Update aosp/master LLVM for rebase to r235153

Change-Id: I9bf53792f9fc30570e81a8d80d296c681d005ea7 (cherry picked from commit 0c7f116bb6950ef819323d855415b2f2b0aad987)
author: Pirama Arumuga Nainar <pirama@google.com> 2015-05-06 11:46:36 -0700
committer: Pirama Arumuga Nainar <pirama@google.com> 2015-05-18 10:52:30 -0700
commit: 2c3e0051c31c3f5b2328b447eadf1cf9c4427442 (patch)
tree: c0104029af14e9f47c2ef58ca60e6137691f3c9b
parent: e1bc145815f4334641be19f1c45ecf85d25b6e5a (diff)
download: external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.zip
external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.gz
external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.bz2
2002 files changed, 46581 insertions, 16722 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index be78ac2..aac8b52 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,8 @@ if (NOT PACKAGE_VERSION)
     "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}")
 endif()
 
+option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF)
+
 option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF)
 
 option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON)
@@ -231,14 +233,7 @@ list(REMOVE_DUPLICATES LLVM_TARGETS_TO_BUILD)
 include(AddLLVMDefinitions)
 
 option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
-
-# MSVC has a gazillion warnings with this.
-if( MSVC )
-  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF)
-else()
-  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
-endif()
-
+option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
 option(LLVM_ENABLE_MODULES "Compile with C++ modules enabled." OFF)
 option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF)
 option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF)
@@ -338,6 +333,7 @@ option (LLVM_BUILD_EXTERNAL_COMPILER_RT
   "Build compiler-rt as an external project." OFF)
 
 option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" OFF)
+option(LLVM_DYLIB_EXPORT_ALL "Export all symbols from libLLVM.dylib (default is C API only" OFF)
 option(LLVM_DISABLE_LLVM_DYLIB_ATEXIT "Disable llvm-shlib's atexit destructors." ON)
 if(LLVM_DISABLE_LLVM_DYLIB_ATEXIT)
   set(DISABLE_LLVM_DYLIB_ATEXIT 1)
diff --git a/Makefile.config.in b/Makefile.config.in
index d34a2d5..a4d6410 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -293,6 +293,11 @@ ENABLE_TERMINFO = @ENABLE_TERMINFO@
 #ENABLE_EXPENSIVE_CHECKS = 0
 @ENABLE_EXPENSIVE_CHECKS@
 
+# --enable-abi-breaking-checks : decide whether we should compile in asserts and
+# checks that make the build ABI incompatible with an llvm built without these
+# checks enabled.
+ENABLE_ABI_BREAKING_CHECKS = @ENABLE_ABI_BREAKING_CHECKS@
+
 # When DEBUG_RUNTIME is enabled, the runtime libraries will retain debug
 # symbols.
 #DEBUG_RUNTIME = 1
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 9617543..10b95a0 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -736,9 +736,17 @@ AC_ARG_ENABLE(abi-breaking-checks,AS_HELP_STRING(
 case "$enableval" in
   with-asserts)  if test ${assertions_enabled} = "yes" ; then
                    AC_DEFINE([LLVM_ENABLE_ABI_BREAKING_CHECKS],[1],[Define to enable checks that alter the LLVM C++ ABI])
+                   AC_SUBST(ENABLE_ABI_BREAKING_CHECKS,[1])
+                 else
+                   AC_SUBST(ENABLE_ABI_BREAKING_CHECKS,[0])
 		 fi ;;
-  yes) AC_DEFINE([LLVM_ENABLE_ABI_BREAKING_CHECKS],[1],[Define to enable checks that alter the LLVM C++ ABI]) ;;
-  no) ;;
+  yes)
+                 AC_DEFINE([LLVM_ENABLE_ABI_BREAKING_CHECKS],[1],[Define to enable checks that alter the LLVM C++ ABI])
+                 AC_SUBST(ENABLE_ABI_BREAKING_CHECKS,[1])
+                 ;;
+  no)
+                 AC_SUBST(ENABLE_ABI_BREAKING_CHECKS,[0])
+                 ;;
   *) AC_MSG_ERROR([Invalid setting for --enable-abi-breaking-checks.  Use "with-asserts", "yes" or "no"])
 esac
 
diff --git a/bindings/go/llvm/DIBuilderBindings.cpp b/bindings/go/llvm/DIBuilderBindings.cpp
index f39198d..cfae605 100644
--- a/bindings/go/llvm/DIBuilderBindings.cpp
+++ b/bindings/go/llvm/DIBuilderBindings.cpp
@@ -21,12 +21,6 @@ using namespace llvm;
 
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef)
 
-namespace {
-template <typename T> T unwrapDI(LLVMMetadataRef v) {
-  return v ? T(unwrap<MDNode>(v)) : T();
-}
-}
-
 LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef mref) {
   Module *m = unwrap(mref);
   return wrap(new DIBuilder(*m));
@@ -64,8 +58,8 @@ LLVMMetadataRef LLVMDIBuilderCreateLexicalBlock(LLVMDIBuilderRef Dref,
                                                 unsigned Line,
                                                 unsigned Column) {
   DIBuilder *D = unwrap(Dref);
-  DILexicalBlock LB = D->createLexicalBlock(
-      unwrapDI<DIDescriptor>(Scope), unwrapDI<DIFile>(File), Line, Column);
+  auto *LB = D->createLexicalBlock(unwrap<MDLocalScope>(Scope),
+                                   unwrap<MDFile>(File), Line, Column);
   return wrap(LB);
 }
 
@@ -75,7 +69,7 @@ LLVMMetadataRef LLVMDIBuilderCreateLexicalBlockFile(LLVMDIBuilderRef Dref,
                                                     unsigned Discriminator) {
   DIBuilder *D = unwrap(Dref);
   DILexicalBlockFile LBF = D->createLexicalBlockFile(
-      unwrapDI<DIDescriptor>(Scope), unwrapDI<DIFile>(File), Discriminator);
+      unwrap<MDLocalScope>(Scope), unwrap<MDFile>(File), Discriminator);
   return wrap(LBF);
 }
 
@@ -86,9 +80,10 @@ LLVMMetadataRef LLVMDIBuilderCreateFunction(
     unsigned ScopeLine, unsigned Flags, int IsOptimized, LLVMValueRef Func) {
   DIBuilder *D = unwrap(Dref);
   DISubprogram SP = D->createFunction(
-      unwrapDI<DIDescriptor>(Scope), Name, LinkageName, unwrapDI<DIFile>(File),
-      Line, unwrapDI<DICompositeType>(CompositeType), IsLocalToUnit,
-      IsDefinition, ScopeLine, Flags, IsOptimized, unwrap<Function>(Func));
+      unwrap<MDScope>(Scope), Name, LinkageName,
+      File ? unwrap<MDFile>(File) : nullptr, Line,
+      unwrap<MDSubroutineType>(CompositeType), IsLocalToUnit, IsDefinition,
+      ScopeLine, Flags, IsOptimized, unwrap<Function>(Func));
   return wrap(SP);
 }
 
@@ -98,8 +93,8 @@ LLVMMetadataRef LLVMDIBuilderCreateLocalVariable(
     int AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
   DIBuilder *D = unwrap(Dref);
   DIVariable V = D->createLocalVariable(
-      Tag, unwrapDI<DIDescriptor>(Scope), Name, unwrapDI<DIFile>(File), Line,
-      unwrapDI<DIType>(Ty), AlwaysPreserve, Flags, ArgNo);
+      Tag, unwrap<MDScope>(Scope), Name, unwrap<MDFile>(File), Line,
+      unwrap<MDType>(Ty), AlwaysPreserve, Flags, ArgNo);
   return wrap(V);
 }
 
@@ -119,7 +114,7 @@ LLVMMetadataRef LLVMDIBuilderCreatePointerType(LLVMDIBuilderRef Dref,
                                                uint64_t AlignInBits,
                                                const char *Name) {
   DIBuilder *D = unwrap(Dref);
-  DIDerivedType T = D->createPointerType(unwrapDI<DIType>(PointeeType),
+  DIDerivedType T = D->createPointerType(unwrap<MDType>(PointeeType),
                                          SizeInBits, AlignInBits, Name);
   return wrap(T);
 }
@@ -128,8 +123,9 @@ LLVMMetadataRef
 LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Dref, LLVMMetadataRef File,
                                   LLVMMetadataRef ParameterTypes) {
   DIBuilder *D = unwrap(Dref);
-  DICompositeType CT = D->createSubroutineType(
-      unwrapDI<DIFile>(File), unwrapDI<DITypeArray>(ParameterTypes));
+  DICompositeType CT =
+      D->createSubroutineType(File ? unwrap<MDFile>(File) : nullptr,
+                              DITypeArray(unwrap<MDTuple>(ParameterTypes)));
   return wrap(CT);
 }
 
@@ -140,9 +136,10 @@ LLVMMetadataRef LLVMDIBuilderCreateStructType(
     LLVMMetadataRef ElementTypes) {
   DIBuilder *D = unwrap(Dref);
   DICompositeType CT = D->createStructType(
-      unwrapDI<DIDescriptor>(Scope), Name, unwrapDI<DIFile>(File), Line,
-      SizeInBits, AlignInBits, Flags, unwrapDI<DIType>(DerivedFrom),
-      unwrapDI<DIArray>(ElementTypes));
+      unwrap<MDScope>(Scope), Name, File ? unwrap<MDFile>(File) : nullptr, Line,
+      SizeInBits, AlignInBits, Flags,
+      DerivedFrom ? unwrap<MDType>(DerivedFrom) : nullptr,
+      ElementTypes ? DIArray(unwrap<MDTuple>(ElementTypes)) : nullptr);
   return wrap(CT);
 }
 
@@ -153,8 +150,8 @@ LLVMMetadataRef LLVMDIBuilderCreateReplaceableCompositeType(
     unsigned Flags) {
   DIBuilder *D = unwrap(Dref);
   DICompositeType CT = D->createReplaceableCompositeType(
-      Tag, Name, unwrapDI<DIDescriptor>(Scope), unwrapDI<DIFile>(File), Line,
-      RuntimeLang, SizeInBits, AlignInBits, Flags);
+      Tag, Name, unwrap<MDScope>(Scope), File ? unwrap<MDFile>(File) : nullptr,
+      Line, RuntimeLang, SizeInBits, AlignInBits, Flags);
   return wrap(CT);
 }
 
@@ -166,8 +163,8 @@ LLVMDIBuilderCreateMemberType(LLVMDIBuilderRef Dref, LLVMMetadataRef Scope,
                               unsigned Flags, LLVMMetadataRef Ty) {
   DIBuilder *D = unwrap(Dref);
   DIDerivedType DT = D->createMemberType(
-      unwrapDI<DIDescriptor>(Scope), Name, unwrapDI<DIFile>(File), Line,
-      SizeInBits, AlignInBits, OffsetInBits, Flags, unwrapDI<DIType>(Ty));
+      unwrap<MDScope>(Scope), Name, File ? unwrap<MDFile>(File) : nullptr, Line,
+      SizeInBits, AlignInBits, OffsetInBits, Flags, unwrap<MDType>(Ty));
   return wrap(DT);
 }
 
@@ -178,8 +175,8 @@ LLVMMetadataRef LLVMDIBuilderCreateArrayType(LLVMDIBuilderRef Dref,
                                              LLVMMetadataRef Subscripts) {
   DIBuilder *D = unwrap(Dref);
   DICompositeType CT =
-      D->createArrayType(SizeInBits, AlignInBits, unwrapDI<DIType>(ElementType),
-                         unwrapDI<DIArray>(Subscripts));
+      D->createArrayType(SizeInBits, AlignInBits, unwrap<MDType>(ElementType),
+                         DIArray(unwrap<MDTuple>(Subscripts)));
   return wrap(CT);
 }
 
@@ -188,9 +185,9 @@ LLVMMetadataRef LLVMDIBuilderCreateTypedef(LLVMDIBuilderRef Dref,
                                            LLVMMetadataRef File, unsigned Line,
                                            LLVMMetadataRef Context) {
   DIBuilder *D = unwrap(Dref);
-  DIDerivedType DT =
-      D->createTypedef(unwrapDI<DIType>(Ty), Name, unwrapDI<DIFile>(File), Line,
-                       unwrapDI<DIDescriptor>(Context));
+  DIDerivedType DT = D->createTypedef(
+      unwrap<MDType>(Ty), Name, File ? unwrap<MDFile>(File) : nullptr, Line,
+      Context ? unwrap<MDScope>(Context) : nullptr);
   return wrap(DT);
 }
 
@@ -208,7 +205,7 @@ LLVMMetadataRef LLVMDIBuilderGetOrCreateArray(LLVMDIBuilderRef Dref,
   Metadata **DataValue = unwrap(Data);
   ArrayRef<Metadata *> Elements(DataValue, Length);
   DIArray A = D->getOrCreateArray(Elements);
-  return wrap(A);
+  return wrap(A.get());
 }
 
 LLVMMetadataRef LLVMDIBuilderGetOrCreateTypeArray(LLVMDIBuilderRef Dref,
@@ -218,7 +215,7 @@ LLVMMetadataRef LLVMDIBuilderGetOrCreateTypeArray(LLVMDIBuilderRef Dref,
   Metadata **DataValue = unwrap(Data);
   ArrayRef<Metadata *> Elements(DataValue, Length);
   DITypeArray A = D->getOrCreateTypeArray(Elements);
-  return wrap(A);
+  return wrap(A.get());
 }
 
 LLVMMetadataRef LLVMDIBuilderCreateExpression(LLVMDIBuilderRef Dref,
@@ -233,10 +230,14 @@ LLVMValueRef LLVMDIBuilderInsertDeclareAtEnd(LLVMDIBuilderRef Dref,
                                              LLVMMetadataRef VarInfo,
                                              LLVMMetadataRef Expr,
                                              LLVMBasicBlockRef Block) {
+  // Fail immediately here until the llgo folks update their bindings.  The
+  // called function is going to assert out anyway.
+  llvm_unreachable("DIBuilder API change requires a DebugLoc");
+
   DIBuilder *D = unwrap(Dref);
-  Instruction *Instr =
-      D->insertDeclare(unwrap(Storage), unwrapDI<DIVariable>(VarInfo),
-                       unwrapDI<DIExpression>(Expr), unwrap(Block));
+  Instruction *Instr = D->insertDeclare(
+      unwrap(Storage), unwrap<MDLocalVariable>(VarInfo),
+      unwrap<MDExpression>(Expr), /* DebugLoc */ nullptr, unwrap(Block));
   return wrap(Instr);
 }
 
@@ -245,9 +246,13 @@ LLVMValueRef LLVMDIBuilderInsertValueAtEnd(LLVMDIBuilderRef Dref,
                                            LLVMMetadataRef VarInfo,
                                            LLVMMetadataRef Expr,
                                            LLVMBasicBlockRef Block) {
+  // Fail immediately here until the llgo folks update their bindings.  The
+  // called function is going to assert out anyway.
+  llvm_unreachable("DIBuilder API change requires a DebugLoc");
+
   DIBuilder *D = unwrap(Dref);
   Instruction *Instr = D->insertDbgValueIntrinsic(
-      unwrap(Val), Offset, unwrapDI<DIVariable>(VarInfo),
-      unwrapDI<DIExpression>(Expr), unwrap(Block));
+      unwrap(Val), Offset, unwrap<MDLocalVariable>(VarInfo),
+      unwrap<MDExpression>(Expr), /* DebugLoc */ nullptr, unwrap(Block));
   return wrap(Instr);
 }
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 50cee30..b3fccb0 100644
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -158,33 +158,38 @@ if(NOT WIN32 AND NOT APPLE)
 endif()
 
 function(add_link_opts target_name)
-  # Pass -O3 to the linker. This enabled different optimizations on different
-  # linkers.
-  if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32))
-    set_property(TARGET ${target_name} APPEND_STRING PROPERTY
-                 LINK_FLAGS " -Wl,-O3")
-  endif()
-
-  if(LLVM_LINKER_IS_GOLD)
-    # With gold gc-sections is always safe.
-    set_property(TARGET ${target_name} APPEND_STRING PROPERTY
-                 LINK_FLAGS " -Wl,--gc-sections")
-    # Note that there is a bug with -Wl,--icf=safe so it is not safe
-    # to enable. See https://sourceware.org/bugzilla/show_bug.cgi?id=17704.
-  endif()
+  # Don't use linker optimizations in debug builds since it slows down the
+  # linker in a context where the optimizations are not important.
+  if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
 
-  if(NOT LLVM_NO_DEAD_STRIP)
-    if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-      # ld64's implementation of -dead_strip breaks tools that use plugins.
+    # Pass -O3 to the linker. This enabled different optimizations on different
+    # linkers.
+    if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32))
       set_property(TARGET ${target_name} APPEND_STRING PROPERTY
-                   LINK_FLAGS " -Wl,-dead_strip")
-    elseif(NOT WIN32 AND NOT LLVM_LINKER_IS_GOLD)
-      # Object files are compiled with -ffunction-data-sections.
-      # Versions of bfd ld < 2.23.1 have a bug in --gc-sections that breaks
-      # tools that use plugins. Always pass --gc-sections once we require
-      # a newer linker.
+                   LINK_FLAGS " -Wl,-O3")
+    endif()
+
+    if(LLVM_LINKER_IS_GOLD)
+      # With gold gc-sections is always safe.
       set_property(TARGET ${target_name} APPEND_STRING PROPERTY
                    LINK_FLAGS " -Wl,--gc-sections")
+      # Note that there is a bug with -Wl,--icf=safe so it is not safe
+      # to enable. See https://sourceware.org/bugzilla/show_bug.cgi?id=17704.
+    endif()
+
+    if(NOT LLVM_NO_DEAD_STRIP)
+      if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+        # ld64's implementation of -dead_strip breaks tools that use plugins.
+        set_property(TARGET ${target_name} APPEND_STRING PROPERTY
+                     LINK_FLAGS " -Wl,-dead_strip")
+      elseif(NOT WIN32 AND NOT LLVM_LINKER_IS_GOLD)
+        # Object files are compiled with -ffunction-data-sections.
+        # Versions of bfd ld < 2.23.1 have a bug in --gc-sections that breaks
+        # tools that use plugins. Always pass --gc-sections once we require
+        # a newer linker.
+        set_property(TARGET ${target_name} APPEND_STRING PROPERTY
+                     LINK_FLAGS " -Wl,--gc-sections")
+      endif()
     endif()
   endif()
 endfunction(add_link_opts)
@@ -402,6 +407,11 @@ function(llvm_add_library name)
 endfunction()
 
 macro(add_llvm_library name)
+  cmake_parse_arguments(ARG
+    "SHARED"
+    ""
+    ""
+    ${ARGN})
   if( BUILD_SHARED_LIBS )
     llvm_add_library(${name} SHARED ${ARGN})
   else()
@@ -413,12 +423,20 @@ macro(add_llvm_library name)
     set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON)
   else()
     if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "LTO")
+      if(ARG_SHARED OR BUILD_SHARED_LIBS)
+        if(WIN32 OR CYGWIN)
+          set(install_type RUNTIME)
+        else()
+          set(install_type LIBRARY)
+        endif()
+      else()
+        set(install_type ARCHIVE)
+      endif()
+
       install(TARGETS ${name}
-        EXPORT LLVMExports
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
-        ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}
-        COMPONENT ${name})
+            EXPORT LLVMExports
+            ${install_type} DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+            COMPONENT ${name})
 
       if (NOT CMAKE_CONFIGURATION_TYPES)
         add_custom_target(install-${name}
@@ -547,6 +565,18 @@ endmacro(add_llvm_example name)
 macro(add_llvm_utility name)
   add_llvm_executable(${name} ${ARGN})
   set_target_properties(${name} PROPERTIES FOLDER "Utils")
+  if( LLVM_INSTALL_UTILS )
+    install (TARGETS ${name}
+      RUNTIME DESTINATION bin
+      COMPONENT ${name})
+    if (NOT CMAKE_CONFIGURATION_TYPES)
+      add_custom_target(install-${name}
+                        DEPENDS ${name}
+                        COMMAND "${CMAKE_COMMAND}"
+                                -DCMAKE_INSTALL_COMPONENT=${name}
+                                -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+    endif()
+  endif()
 endmacro(add_llvm_utility name)
 
 
@@ -791,8 +821,13 @@ function(add_lit_testsuites project directory)
   if (NOT CMAKE_CONFIGURATION_TYPES)
     parse_arguments(ARG "PARAMS;DEPENDS;ARGS" "" ${ARGN})
     file(GLOB_RECURSE litCfg ${directory}/lit*.cfg)
+    set(lit_suites)
     foreach(f ${litCfg})
       get_filename_component(dir ${f} DIRECTORY)
+      set(lit_suites ${lit_suites} ${dir})
+    endforeach()
+    list(REMOVE_DUPLICATES lit_suites)
+    foreach(dir ${lit_suites})
       string(REPLACE ${directory} "" name_slash ${dir})
       if (name_slash)
         string(REPLACE "/" "-" name_slash ${name_slash})
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 67f86a6..27147dc 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -315,6 +315,13 @@ if( MSVC )
   # Enable warnings
   if (LLVM_ENABLE_WARNINGS)
     append("/W4" msvc_warning_flags)
+    # CMake appends /W3 by default, and having /W3 followed by /W4 will result in 
+    # cl : Command line warning D9025 : overriding '/W3' with '/W4'.  Since this is
+    # a command line warning and not a compiler warning, it cannot be suppressed except
+    # by fixing the command line.
+    string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
     if (LLVM_ENABLE_PEDANTIC)
       # No MSVC equivalent available
     endif (LLVM_ENABLE_PEDANTIC)
@@ -486,7 +493,8 @@ endif()
 # But MinSizeRel seems to add that automatically, so maybe disable these
 # flags instead if LLVM_NO_DEAD_STRIP is set.
 if(NOT CYGWIN AND NOT WIN32)
-  if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+  if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" AND
+     NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
     check_c_compiler_flag("-Werror -fno-function-sections" C_SUPPORTS_FNO_FUNCTION_SECTIONS)
     if (C_SUPPORTS_FNO_FUNCTION_SECTIONS)
       # Don't add -ffunction-section if it can be disabled with -fno-function-sections.
diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in
index 9a9cd85..5b7789d 100644
--- a/cmake/modules/LLVMConfig.cmake.in
+++ b/cmake/modules/LLVMConfig.cmake.in
@@ -21,6 +21,8 @@ set(LLVM_TARGETS_WITH_JIT @LLVM_TARGETS_WITH_JIT@)
 
 set(TARGET_TRIPLE "@TARGET_TRIPLE@")
 
+set(LLVM_ABI_BREAKING_CHECKS @LLVM_ABI_BREAKING_CHECKS@)
+
 set(LLVM_ENABLE_ASSERTIONS @LLVM_ENABLE_ASSERTIONS@)
 
 set(LLVM_ENABLE_EH @LLVM_ENABLE_EH@)
diff --git a/cmake/modules/Makefile b/cmake/modules/Makefile
index e38f5a6..97ee7d3 100644
--- a/cmake/modules/Makefile
+++ b/cmake/modules/Makefile
@@ -21,6 +21,12 @@ else
 	LLVM_ENABLE_ASSERTIONS := 1
 endif
 
+ifeq ($(ENABLE_ABI_BREAKING_CHECKS),1)
+	LLVM_ABI_BREAKING_CHECKS := FORCE_ON
+else
+	LLVM_ABI_BREAKING_CHECKS := FORCE_OFF
+endif
+
 ifeq ($(REQUIRES_EH),1)
 	LLVM_ENABLE_EH := 1
 else
@@ -63,6 +69,7 @@ $(PROJ_OBJ_DIR)/LLVMConfig.cmake: LLVMConfig.cmake.in Makefile $(LLVMBuildCMakeF
 	  -e 's/@LLVM_TARGETS_TO_BUILD@/'"$(TARGETS_TO_BUILD)"'/' \
 	  -e 's/@LLVM_TARGETS_WITH_JIT@/'"$(TARGETS_WITH_JIT)"'/' \
 	  -e 's/@TARGET_TRIPLE@/'"$(TARGET_TRIPLE)"'/' \
+	  -e 's/@LLVM_ABI_BREAKING_CHECKS@/'"$(LLVM_ABI_BREAKING_CHECKS)"'/' \
 	  -e 's/@LLVM_ENABLE_ASSERTIONS@/'"$(LLVM_ENABLE_ASSERTIONS)"'/' \
 	  -e 's/@LLVM_ENABLE_EH@/'"$(LLVM_ENABLE_EH)"'/' \
 	  -e 's/@LLVM_ENABLE_RTTI@/'"$(LLVM_ENABLE_RTTI)"'/' \
diff --git a/configure b/configure
index 4ad1f21..7ddb0f3 100755
--- a/configure
+++ b/configure
@@ -697,6 +697,7 @@ DISABLE_ASSERTIONS
 ENABLE_WERROR
 ENABLE_EXPENSIVE_CHECKS
 EXPENSIVE_CHECKS
+ENABLE_ABI_BREAKING_CHECKS
 DEBUG_RUNTIME
 DEBUG_SYMBOLS
 KEEP_SYMBOLS
@@ -5042,13 +5043,25 @@ cat >>confdefs.h <<\_ACEOF
 #define LLVM_ENABLE_ABI_BREAKING_CHECKS 1
 _ACEOF
 
+                   ENABLE_ABI_BREAKING_CHECKS=1
+
+                 else
+                   ENABLE_ABI_BREAKING_CHECKS=0
+
 		 fi ;;
   yes)
+
 cat >>confdefs.h <<\_ACEOF
 #define LLVM_ENABLE_ABI_BREAKING_CHECKS 1
 _ACEOF
- ;;
-  no) ;;
+
+                 ENABLE_ABI_BREAKING_CHECKS=1
+
+                 ;;
+  no)
+                 ENABLE_ABI_BREAKING_CHECKS=0
+
+                 ;;
   *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-abi-breaking-checks.  Use \"with-asserts\", \"yes\" or \"no\"" >&5
 echo "$as_me: error: Invalid setting for --enable-abi-breaking-checks.  Use \"with-asserts\", \"yes\" or \"no\"" >&2;}
    { (exit 1); exit 1; }; }
@@ -18835,8 +18848,8 @@ DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
 ENABLE_WERROR!$ENABLE_WERROR$ac_delim
 ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
+ENABLE_ABI_BREAKING_CHECKS!$ENABLE_ABI_BREAKING_CHECKS$ac_delim
 DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
-DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -18878,6 +18891,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
 KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
@@ -18974,7 +18988,6 @@ HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 MMAP_FILE!$MMAP_FILE$ac_delim
 SHLIBEXT!$SHLIBEXT$ac_delim
 LLVM_PREFIX!$LLVM_PREFIX$ac_delim
-LLVM_BINDIR!$LLVM_BINDIR$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -19016,6 +19029,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+LLVM_BINDIR!$LLVM_BINDIR$ac_delim
 LLVM_DATADIR!$LLVM_DATADIR$ac_delim
 LLVM_DOCSDIR!$LLVM_DOCSDIR$ac_delim
 LLVM_ETCDIR!$LLVM_ETCDIR$ac_delim
@@ -19035,7 +19049,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 17; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 18; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/Bugpoint.rst b/docs/Bugpoint.rst
index 8fa64bc..6bd7ff9 100644
--- a/docs/Bugpoint.rst
+++ b/docs/Bugpoint.rst
@@ -208,7 +208,7 @@ point---a simple binary search may not be sufficient, as transformations that
 interact may require isolating more than one call.  In TargetLowering, use
 ``return SDNode();`` instead of ``return false;``.
 
-Now that that the number of transformations is down to a manageable number, try
+Now that the number of transformations is down to a manageable number, try
 examining the output to see if you can figure out which transformations are
 being done.  If that can be figured out, then do the usual debugging.  If which
 code corresponds to the transformation being performed isn't obvious, set a
diff --git a/docs/ExceptionHandling.rst b/docs/ExceptionHandling.rst
index 21de19b..72ed78a 100644
--- a/docs/ExceptionHandling.rst
+++ b/docs/ExceptionHandling.rst
@@ -519,37 +519,29 @@ action.
 
 A code of ``i32 1`` indicates a catch action, which expects three additional
 arguments. Different EH schemes give different meanings to the three arguments,
-but the first argument indicates whether the catch should fire, the second is a
-pointer to stack object where the exception object should be stored, and the
-third is the code to run to catch the exception.
+but the first argument indicates whether the catch should fire, the second is
+the frameescape index of the exception object, and the third is the code to run
+to catch the exception.
 
 For Windows C++ exception handling, the first argument for a catch handler is a
-pointer to the RTTI type descriptor for the object to catch. The third argument
-is a pointer to a function implementing the catch. This function returns the
-address of the basic block where execution should resume after handling the
-exception.
+pointer to the RTTI type descriptor for the object to catch. The second
+argument is an index into the argument list of the ``llvm.frameescape`` call in
+the main function. The exception object will be copied into the provided stack
+object. If the exception object is not required, this argument should be -1.
+The third argument is a pointer to a function implementing the catch.  This
+function returns the address of the basic block where execution should resume
+after handling the exception.
 
 For Windows SEH, the first argument is a pointer to the filter function, which
 indicates if the exception should be caught or not.  The second argument is
-typically null. The third argument is the address of a basic block where the
-exception will be handled. In other words, catch handlers are not outlined in
-SEH. After running cleanups, execution immediately resumes at this PC.
+typically negative one. The third argument is the address of a basic block
+where the exception will be handled. In other words, catch handlers are not
+outlined in SEH. After running cleanups, execution immediately resumes at this
+PC.
 
 In order to preserve the structure of the CFG, a call to '``llvm.eh.actions``'
-must be followed by an ':ref:`indirectbr <i_indirectbr>`' instruction that jumps
-to the result of the intrinsic call.
-
-``llvm.eh.unwindhelp``
-----------------------
-
-.. code-block:: llvm
-
-  void @llvm.eh.unwindhelp(i8*)
-
-This intrinsic designates the provided static alloca as the unwind help object.
-This object is used by Windows native exception handling on non-x86 platforms
-where xdata unwind information is used. It is typically an 8 byte chunk of
-memory treated as two 32-bit integers.
+must be followed by an ':ref:`indirectbr <i_indirectbr>`' instruction that
+jumps to the result of the intrinsic call.
 
 
 SJLJ Intrinsics
diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst
index 2552c07..56c48af 100644
--- a/docs/ExtendingLLVM.rst
+++ b/docs/ExtendingLLVM.rst
@@ -178,42 +178,46 @@ Adding a new instruction
   to maintain compatibility with the previous version. Only add an instruction
   if it is absolutely necessary.
 
-#. ``llvm/include/llvm/Instruction.def``:
+#. ``llvm/include/llvm/IR/Instruction.def``:
 
    add a number for your instruction and an enum name
 
-#. ``llvm/include/llvm/Instructions.h``:
+#. ``llvm/include/llvm/IR/Instructions.h``:
 
    add a definition for the class that will represent your instruction
 
-#. ``llvm/include/llvm/Support/InstVisitor.h``:
+#. ``llvm/include/llvm/IR/InstVisitor.h``:
 
    add a prototype for a visitor to your new instruction type
 
-#. ``llvm/lib/AsmParser/Lexer.l``:
+#. ``llvm/lib/AsmParser/LLLexer.cpp``:
 
    add a new token to parse your instruction from assembly text file
 
-#. ``llvm/lib/AsmParser/llvmAsmParser.y``:
+#. ``llvm/lib/AsmParser/LLParser.cpp``:
 
    add the grammar on how your instruction can be read and what it will
    construct as a result
 
-#. ``llvm/lib/Bitcode/Reader/Reader.cpp``:
+#. ``llvm/lib/Bitcode/Reader/BitcodeReader.cpp``:
 
    add a case for your instruction and how it will be parsed from bitcode
 
-#. ``llvm/lib/VMCore/Instruction.cpp``:
+#. ``llvm/lib/Bitcode/Writer/BitcodeWriter.cpp``:
+
+   add a case for your instruction and how it will be parsed from bitcode
+
+#. ``llvm/lib/IR/Instruction.cpp``:
 
    add a case for how your instruction will be printed out to assembly
 
-#. ``llvm/lib/VMCore/Instructions.cpp``:
+#. ``llvm/lib/IR/Instructions.cpp``:
 
    implement the class you defined in ``llvm/include/llvm/Instructions.h``
 
 #. Test your instruction
 
-#. ``llvm/lib/Target/*``: 
+#. ``llvm/lib/Target/*``:
 
    add support for your instruction to code generators, or add a lowering pass.
 
@@ -236,69 +240,88 @@ Adding a new type
 Adding a fundamental type
 -------------------------
 
-#. ``llvm/include/llvm/Type.h``:
+#. ``llvm/include/llvm/IR/Type.h``:
 
    add enum for the new type; add static ``Type*`` for this type
 
-#. ``llvm/lib/VMCore/Type.cpp``:
+#. ``llvm/lib/IR/Type.cpp`` and ``llvm/lib/IR/ValueTypes.cpp``:
 
    add mapping from ``TypeID`` => ``Type*``; initialize the static ``Type*``
 
-#. ``llvm/lib/AsmReader/Lexer.l``:
+#. ``llvm/llvm/llvm-c/Core.cpp``:
+
+   add enum ``LLVMTypeKind`` and modify
+   ``LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty)`` for the new type
+
+#. ``llvm/include/llvm/IR/TypeBuilder.h``:
+
+   add new class to represent new type in the hierarchy
+
+#. ``llvm/lib/AsmParser/LLLexer.cpp``:
 
    add ability to parse in the type from text assembly
 
-#. ``llvm/lib/AsmReader/llvmAsmParser.y``:
+#. ``llvm/lib/AsmParser/LLParser.cpp``:
 
    add a token for that type
 
+#. ``llvm/lib/Bitcode/Writer/BitcodeWriter.cpp``:
+
+   modify ``static void WriteTypeTable(const ValueEnumerator &VE,
+   BitstreamWriter &Stream)`` to serialize your type
+
+#. ``llvm/lib/Bitcode/Reader/BitcodeReader.cpp``:
+
+   modify ``bool BitcodeReader::ParseTypeType()`` to read your data type
+
+#. ``include/llvm/Bitcode/LLVMBitCodes.h``:
+
+   add enum ``TypeCodes`` for the new type
+
 Adding a derived type
 ---------------------
 
-#. ``llvm/include/llvm/Type.h``:
+#. ``llvm/include/llvm/IR/Type.h``:
 
    add enum for the new type; add a forward declaration of the type also
 
-#. ``llvm/include/llvm/DerivedTypes.h``:
+#. ``llvm/include/llvm/IR/DerivedTypes.h``:
 
    add new class to represent new class in the hierarchy; add forward
    declaration to the TypeMap value type
 
-#. ``llvm/lib/VMCore/Type.cpp``:
+#. ``llvm/lib/IR/Type.cpp`` and ``llvm/lib/IR/ValueTypes.cpp``:
 
-   add support for derived type to:
+   add support for derived type, notably `enum TypeID` and `is`, `get` methods.
 
-   .. code-block:: c++
+#. ``llvm/llvm/llvm-c/Core.cpp``:
 
-     std::string getTypeDescription(const Type &Ty,
-                                    std::vector<const Type*> &TypeStack)
-     bool TypesEqual(const Type *Ty, const Type *Ty2,
-                     std::map<const Type*, const Type*> &EqTypes)
+   add enum ``LLVMTypeKind`` and modify
+   `LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty)` for the new type
 
-   add necessary member functions for type, and factory methods
+#. ``llvm/include/llvm/IR/TypeBuilder.h``:
 
-#. ``llvm/lib/AsmReader/Lexer.l``:
+   add new class to represent new class in the hierarchy
 
-   add ability to parse in the type from text assembly
+#. ``llvm/lib/AsmParser/LLLexer.cpp``:
 
-#. ``llvm/lib/Bitcode/Writer/Writer.cpp``:
+   modify ``lltok::Kind LLLexer::LexIdentifier()`` to add ability to
+   parse in the type from text assembly
 
-   modify ``void BitcodeWriter::outputType(const Type *T)`` to serialize your
-   type
+#. ``llvm/lib/Bitcode/Writer/BitcodeWriter.cpp``:
 
-#. ``llvm/lib/Bitcode/Reader/Reader.cpp``:
+   modify ``static void WriteTypeTable(const ValueEnumerator &VE,
+   BitstreamWriter &Stream)`` to serialize your type
 
-   modify ``const Type *BitcodeReader::ParseType()`` to read your data type
+#. ``llvm/lib/Bitcode/Reader/BitcodeReader.cpp``:
 
-#. ``llvm/lib/VMCore/AsmWriter.cpp``:
+   modify ``bool BitcodeReader::ParseTypeType()`` to read your data type
 
-   modify
+#. ``include/llvm/Bitcode/LLVMBitCodes.h``:
 
-   .. code-block:: c++
+   add enum ``TypeCodes`` for the new type
 
-     void calcTypeName(const Type *Ty,
-                       std::vector<const Type*> &TypeStack,
-                       std::map<const Type*,std::string> &TypeNames,
-                       std::string &Result)
+#. ``llvm/lib/IR/AsmWriter.cpp``:
 
+   modify ``void TypePrinting::print(Type *Ty, raw_ostream &OS)``
    to output the new derived type
diff --git a/docs/Extensions.rst b/docs/Extensions.rst
index 271c085..c8ff07c 100644
--- a/docs/Extensions.rst
+++ b/docs/Extensions.rst
@@ -165,6 +165,29 @@ and ``.bar`` is associated to ``.foo``.
 	.section	.foo,"bw",discard, "sym"
 	.section	.bar,"rd",associative, "sym"
 
+
+ELF-Dependent
+-------------
+
+``.section`` Directive
+^^^^^^^^^^^^^^^^^^^^^^
+
+In order to support creating multiple sections with the same name and comdat,
+it is possible to add an unique number at the end of the ``.seciton`` directive.
+For example, the following code creates two sections named ``.text``.
+
+.. code-block:: gas
+
+	.section	.text,"ax",@progbits,unique,1
+        nop
+
+	.section	.text,"ax",@progbits,unique,2
+        nop
+
+
+The unique number is not present in the resulting object at all. It is just used
+in the assembler to differentiate the sections.
+
 Target Specific Behaviour
 =========================
 
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 5eaea1c..a5a8869 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -348,13 +348,13 @@ added in the future:
 "``anyregcc``" - Dynamic calling convention for code patching
     This is a special convention that supports patching an arbitrary code
     sequence in place of a call site. This convention forces the call
-    arguments into registers but allows them to be dynamcially
+    arguments into registers but allows them to be dynamically
     allocated. This can currently only be used with calls to
     llvm.experimental.patchpoint because only this intrinsic records
     the location of its arguments in a side table. See :doc:`StackMaps`.
 "``preserve_mostcc``" - The `PreserveMost` calling convention
-    This calling convention attempts to make the code in the caller as little
-    intrusive as possible. This calling convention behaves identical to the `C`
+    This calling convention attempts to make the code in the caller as
+    unintrusive as possible. This convention behaves identically to the `C`
     calling convention on how arguments and return values are passed, but it
     uses a different set of caller/callee-saved registers. This alleviates the
     burden of saving and recovering a large register set before and after the
@@ -1012,6 +1012,19 @@ Currently, only the following parameter attributes are defined:
     array), however ``dereferenceable(<n>)`` does imply ``nonnull`` in
     ``addrspace(0)`` (which is the default address space).
 
+``dereferenceable_or_null(<n>)``
+    This indicates that the parameter or return value isn't both
+    non-null and non-dereferenceable (up to ``<n>`` bytes) at the same
+    time.  All non-null pointers tagged with
+    ``dereferenceable_or_null(<n>)`` are ``dereferenceable(<n>)``.
+    For address space 0 ``dereferenceable_or_null(<n>)`` implies that
+    a pointer is exactly one of ``dereferenceable(<n>)`` or ``null``,
+    and in other address spaces ``dereferenceable_or_null(<n>)``
+    implies that a pointer is at least one of ``dereferenceable(<n>)``
+    or ``null`` (i.e. it may be both ``null`` and
+    ``dereferenceable(<n>)``).  This attribute may only be applied to
+    pointer typed parameters.
+
 .. _gc:
 
 Garbage Collector Strategy Names
@@ -3235,21 +3248,15 @@ arguments (``DW_TAG_arg_variable``).  In the latter case, the ``arg:`` field
 specifies the argument position, and this variable will be included in the
 ``variables:`` field of its :ref:`MDSubprogram`.
 
-If set, the ``inlinedAt:`` field points at an :ref:`MDLocation`, and the
-variable represents an inlined version of a variable (with all other fields
-duplicated from the non-inlined version).
-
 .. code-block:: llvm
 
     !0 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 0,
                           scope: !3, file: !2, line: 7, type: !3,
-                          flags: DIFlagArtificial, inlinedAt: !4)
+                          flags: DIFlagArtificial)
     !1 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1,
-                          scope: !4, file: !2, line: 7, type: !3,
-                          inlinedAt: !6)
+                          scope: !4, file: !2, line: 7, type: !3)
     !1 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "y",
-                          scope: !5, file: !2, line: 7, type: !3,
-                          inlinedAt: !6)
+                          scope: !5, file: !2, line: 7, type: !3)
 
 MDExpression
 """"""""""""
@@ -3370,7 +3377,7 @@ instructions (loads, stores, memory-accessing calls, etc.) that carry
 ``noalias`` metadata can specifically be specified not to alias with some other
 collection of memory access instructions that carry ``alias.scope`` metadata.
 Each type of metadata specifies a list of scopes where each scope has an id and
-a domain. When evaluating an aliasing query, if for some some domain, the set
+a domain. When evaluating an aliasing query, if for some domain, the set
 of scopes with that domain in one instruction's ``alias.scope`` list is a
 subset of (or equal to) the set of scopes for that domain in another
 instruction's ``noalias`` list, then the two memory accesses are assumed not to
@@ -6577,7 +6584,7 @@ Arguments:
 """"""""""
 
 The '``ptrtoint``' instruction takes a ``value`` to cast, which must be
-a a value of type :ref:`pointer <t_pointer>` or a vector of pointers, and a
+a value of type :ref:`pointer <t_pointer>` or a vector of pointers, and a
 type to cast it to ``ty2``, which must be an :ref:`integer <t_integer>` or
 a vector of integers type.
 
diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst
new file mode 100644
index 0000000..a31f83d
--- /dev/null
+++ b/docs/LibFuzzer.rst
@@ -0,0 +1,364 @@
+========================================================
+LibFuzzer -- a library for coverage-guided fuzz testing.
+========================================================
+.. contents::
+   :local:
+   :depth: 4
+
+Introduction
+============
+
+This library is intended primarily for in-process coverage-guided fuzz testing
+(fuzzing) of other libraries. The typical workflow looks like this:
+
+* Build the Fuzzer library as a static archive (or just a set of .o files).
+  Note that the Fuzzer contains the main() function.
+  Preferably do *not* use sanitizers while building the Fuzzer.
+* Build the library you are going to test with -fsanitize-coverage=[234]
+  and one of the sanitizers. We recommend to build the library in several
+  different modes (e.g. asan, msan, lsan, ubsan, etc) and even using different
+  optimizations options (e.g. -O0, -O1, -O2) to diversify testing.
+* Build a test driver using the same options as the library.
+  The test driver is a C/C++ file containing interesting calls to the library
+  inside a single function  ``extern "C" void TestOneInput(const uint8_t *Data, size_t Size);``
+* Link the Fuzzer, the library and the driver together into an executable
+  using the same sanitizer options as for the library.
+* Collect the initial corpus of inputs for the
+  fuzzer (a directory with test inputs, one file per input).
+  The better your inputs are the faster you will find something interesting.
+  Also try to keep your inputs small, otherwise the Fuzzer will run too slow.
+* Run the fuzzer with the test corpus. As new interesting test cases are
+  discovered they will be added to the corpus. If a bug is discovered by
+  the sanitizer (asan, etc) it will be reported as usual and the reproducer
+  will be written to disk.
+  Each Fuzzer process is single-threaded (unless the library starts its own
+  threads). You can run the Fuzzer on the same corpus in multiple processes.
+  in parallel. For run-time options run the Fuzzer binary with '-help=1'.
+
+
+The Fuzzer is similar in concept to AFL_,
+but uses in-process Fuzzing, which is more fragile, more restrictive, but
+potentially much faster as it has no overhead for process start-up.
+It uses LLVM's SanitizerCoverage_ instrumentation to get in-process
+coverage-feedback
+
+The code resides in the LLVM repository, requires the fresh Clang compiler to build
+and is used to fuzz various parts of LLVM,
+but the Fuzzer itself does not (and should not) depend on any
+part of LLVM and can be used for other projects w/o requiring the rest of LLVM.
+
+Usage examples
+==============
+
+Toy example
+-----------
+
+A simple function that does something interesting if it receives the input "HI!"::
+
+  cat << EOF >> test_fuzzer.cc
+  extern "C" void TestOneInput(const unsigned char *data, unsigned long size) {
+    if (size > 0 && data[0] == 'H')
+      if (size > 1 && data[1] == 'I')
+         if (size > 2 && data[2] == '!')
+         __builtin_trap();
+  }
+  EOF
+  # Get lib/Fuzzer. Assuming that you already have fresh clang in PATH.
+  svn co http://llvm.org/svn/llvm-project/llvm/trunk/lib/Fuzzer
+  # Build lib/Fuzzer files.
+  clang -c -g -O2 -std=c++11 Fuzzer/*.cpp -IFuzzer
+  # Build test_fuzzer.cc with asan and link against lib/Fuzzer.
+  clang++ -fsanitize=address -fsanitize-coverage=3 test_fuzzer.cc Fuzzer*.o
+  # Run the fuzzer with no corpus.
+  ./a.out
+
+You should get ``Illegal instruction (core dumped)`` pretty quickly.
+
+PCRE2
+-----
+
+Here we show how to use lib/Fuzzer on something real, yet simple: pcre2_::
+
+  COV_FLAGS=" -fsanitize-coverage=4 -mllvm -sanitizer-coverage-8bit-counters=1"
+  # Get PCRE2
+  svn co svn://vcs.exim.org/pcre2/code/trunk pcre
+  # Get lib/Fuzzer. Assuming that you already have fresh clang in PATH.
+  svn co http://llvm.org/svn/llvm-project/llvm/trunk/lib/Fuzzer
+  # Build PCRE2 with AddressSanitizer and coverage.
+  (cd pcre; ./autogen.sh; CC="clang -fsanitize=address $COV_FLAGS" ./configure --prefix=`pwd`/../inst && make -j && make install)
+  # Build lib/Fuzzer files.
+  clang -c -g -O2 -std=c++11 Fuzzer/*.cpp -IFuzzer
+  # Build the the actual function that does something interesting with PCRE2.
+  cat << EOF > pcre_fuzzer.cc
+  #include <string.h>
+  #include "pcre2posix.h"
+  extern "C" void TestOneInput(const unsigned char *data, size_t size) {
+    if (size < 1) return;
+    char *str = new char[size+1];
+    memcpy(str, data, size);
+    str[size] = 0;
+    regex_t preg;
+    if (0 == regcomp(&preg, str, 0)) {
+      regexec(&preg, str, 0, 0, 0);
+      regfree(&preg);
+    }
+    delete [] str;
+  }
+  EOF
+  clang++ -g -fsanitize=address $COV_FLAGS -c -std=c++11  -I inst/include/ pcre_fuzzer.cc
+  # Link.
+  clang++ -g -fsanitize=address -Wl,--whole-archive inst/lib/*.a -Wl,-no-whole-archive Fuzzer*.o pcre_fuzzer.o -o pcre_fuzzer
+
+This will give you a binary of the fuzzer, called ``pcre_fuzzer``.
+Now, create a directory that will hold the test corpus::
+
+  mkdir -p CORPUS
+
+For simple input languages like regular expressions this is all you need.
+For more complicated inputs populate the directory with some input samples.
+Now run the fuzzer with the corpus dir as the only parameter::
+
+  ./pcre_fuzzer ./CORPUS
+
+You will see output like this::
+
+  Seed: 1876794929
+  #0      READ   cov 0 bits 0 units 1 exec/s 0
+  #1      pulse  cov 3 bits 0 units 1 exec/s 0
+  #1      INITED cov 3 bits 0 units 1 exec/s 0
+  #2      pulse  cov 208 bits 0 units 1 exec/s 0
+  #2      NEW    cov 208 bits 0 units 2 exec/s 0 L: 64
+  #3      NEW    cov 217 bits 0 units 3 exec/s 0 L: 63
+  #4      pulse  cov 217 bits 0 units 3 exec/s 0
+
+* The ``Seed:`` line shows you the current random seed (you can change it with ``-seed=N`` flag).
+* The ``READ``  line shows you how many input files were read (since you passed an empty dir there were inputs, but one dummy input was synthesised).
+* The ``INITED`` line shows you that how many inputs will be fuzzed.
+* The ``NEW`` lines appear with the fuzzer finds a new interesting input, which is saved to the CORPUS dir. If multiple corpus dirs are given, the first one is used.
+* The ``pulse`` lines appear periodically to show the current status.
+
+Now, interrupt the fuzzer and run it again the same way. You will see::
+
+  Seed: 1879995378
+  #0      READ   cov 0 bits 0 units 564 exec/s 0
+  #1      pulse  cov 502 bits 0 units 564 exec/s 0
+  ...
+  #512    pulse  cov 2933 bits 0 units 564 exec/s 512
+  #564    INITED cov 2991 bits 0 units 344 exec/s 564
+  #1024   pulse  cov 2991 bits 0 units 344 exec/s 1024
+  #1455   NEW    cov 2995 bits 0 units 345 exec/s 1455 L: 49
+
+This time you were running the fuzzer with a non-empty input corpus (564 items).
+As the first step, the fuzzer minimized the set to produce 344 interesting items (the ``INITED`` line)
+
+You may run ``N`` independent fuzzer jobs in parallel on ``M`` CPUs::
+
+  N=100; M=4; ./pcre_fuzzer ./CORPUS -jobs=$N -workers=$M
+
+This is useful when you already have an exhaustive test corpus.
+If you've just started fuzzing with no good corpus running independent
+jobs will create a corpus with too many duplicates.
+One way to avoid this and still use all of your CPUs is to use the flag ``-exit_on_first=1``
+which will cause the fuzzer to exit on the first new synthesised input::
+
+  N=100; M=4; ./pcre_fuzzer ./CORPUS -jobs=$N -workers=$M -exit_on_first=1
+
+Heartbleed
+----------
+Remember Heartbleed_?
+As it was recently `shown <https://blog.hboeck.de/archives/868-How-Heartbleed-couldve-been-found.html>`_,
+fuzzing with AddressSanitizer can find Heartbleed. Indeed, here are the step-by-step instructions
+to find Heartbleed with LibFuzzer::
+
+  wget https://www.openssl.org/source/openssl-1.0.1f.tar.gz
+  tar xf openssl-1.0.1f.tar.gz
+  COV_FLAGS="-fsanitize-coverage=4" # -mllvm -sanitizer-coverage-8bit-counters=1"
+  (cd openssl-1.0.1f/ && ./config &&
+    make -j 32 CC="clang -g -fsanitize=address $COV_FLAGS")
+  # Get and build LibFuzzer
+  svn co http://llvm.org/svn/llvm-project/llvm/trunk/lib/Fuzzer
+  clang -c -g -O2 -std=c++11 Fuzzer/*.cpp -IFuzzer
+  # Get examples of key/pem files.
+  git clone   https://github.com/hannob/selftls
+  cp selftls/server* . -v
+  cat << EOF > handshake-fuzz.cc
+  #include <openssl/ssl.h>
+  #include <openssl/err.h>
+  #include <assert.h>
+  SSL_CTX *sctx;
+  int Init() {
+    SSL_library_init();
+    SSL_load_error_strings();
+    ERR_load_BIO_strings();
+    OpenSSL_add_all_algorithms();
+    assert (sctx = SSL_CTX_new(TLSv1_method()));
+    assert (SSL_CTX_use_certificate_file(sctx, "server.pem", SSL_FILETYPE_PEM));
+    assert (SSL_CTX_use_PrivateKey_file(sctx, "server.key", SSL_FILETYPE_PEM));
+    return 0;
+  }
+  extern "C" void TestOneInput(unsigned char *Data, size_t Size) {
+    static int unused = Init();
+    SSL *server = SSL_new(sctx);
+    BIO *sinbio = BIO_new(BIO_s_mem());
+    BIO *soutbio = BIO_new(BIO_s_mem());
+    SSL_set_bio(server, sinbio, soutbio);
+    SSL_set_accept_state(server);
+    BIO_write(sinbio, Data, Size);
+    SSL_do_handshake(server);
+    SSL_free(server);
+  }
+  EOF
+  # Build the fuzzer. 
+  clang++ -g handshake-fuzz.cc  -fsanitize=address \
+    openssl-1.0.1f/libssl.a openssl-1.0.1f/libcrypto.a Fuzzer*.o
+  # Run 20 independent fuzzer jobs.
+  ./a.out  -jobs=20 -workers=20
+
+Voila::
+
+  #1048576        pulse  cov 3424 bits 0 units 9 exec/s 24385
+  =================================================================
+  ==17488==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x629000004748 at pc 0x00000048c979 bp 0x7fffe3e864f0 sp 0x7fffe3e85ca8
+  READ of size 60731 at 0x629000004748 thread T0
+      #0 0x48c978 in __asan_memcpy
+      #1 0x4db504 in tls1_process_heartbeat openssl-1.0.1f/ssl/t1_lib.c:2586:3
+      #2 0x580be3 in ssl3_read_bytes openssl-1.0.1f/ssl/s3_pkt.c:1092:4
+
+Advanced features
+=================
+
+Tokens
+------
+
+By default, the fuzzer is not aware of complexities of the input language
+and when fuzzing e.g. a C++ parser it will mostly stress the lexer.
+It is very hard for the fuzzer to come up with something like ``reinterpret_cast<int>``
+from a test corpus that doesn't have it.
+See a detailed discussion of this topic at
+http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html.
+
+lib/Fuzzer implements a simple technique that allows to fuzz input languages with
+long tokens. All you need is to prepare a text file containing up to 253 tokens, one token per line,
+and pass it to the fuzzer as ``-tokens=TOKENS_FILE.txt``.
+Three implicit tokens are added: ``" "``, ``"\t"``, and ``"\n"``.
+The fuzzer itself will still be mutating a string of bytes
+but before passing this input to the target library it will replace every byte ``b`` with the ``b``-th token.
+If there are less than ``b`` tokens, a space will be added instead.
+
+AFL compatibility
+-----------------
+LibFuzzer can be used in parallel with AFL_ on the same test corpus.
+Both fuzzers expect the test corpus to reside in a directory, one file per input.
+You can run both fuzzers on the same corpus in parallel::
+
+  ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program -r @@
+  ./llvm-fuzz testcase_dir findings_dir  # Will write new tests to testcase_dir
+
+Periodically restart both fuzzers so that they can use each other's findings.
+
+How good is my fuzzer?
+----------------------
+
+Once you implement your target function ``TestOneInput`` and fuzz it to death,
+you will want to know whether the function or the corpus can be improved further.
+One easy to use metric is, of course, code coverage.
+You can get the coverage for your corpus like this::
+
+  ASAN_OPTIONS=coverage_pcs=1 ./fuzzer CORPUS_DIR -runs=0
+
+This will run all the tests in the CORPUS_DIR but will not generate any new tests
+and dump covered PCs to disk before exiting.
+Then you can subtract the set of covered PCs from the set of all instrumented PCs in the binary,
+see SanitizerCoverage_ for details.
+
+Fuzzing components of LLVM
+==========================
+
+clang-format-fuzzer
+-------------------
+The inputs are random pieces of C++-like text.
+
+Build (make sure to use fresh clang as the host compiler)::
+
+    cmake -GNinja  -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=YES -DCMAKE_BUILD_TYPE=Release /path/to/llvm
+    ninja clang-format-fuzzer
+    mkdir CORPUS_DIR
+    ./bin/clang-format-fuzzer CORPUS_DIR
+
+Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc).
+
+TODO: commit the pre-fuzzed corpus to svn (?).
+
+Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23052
+
+clang-fuzzer
+------------
+
+The default behavior is very similar to ``clang-format-fuzzer``.
+Clang can also be fuzzed with Tokens_ using ``-tokens=$LLVM/lib/Fuzzer/cxx_fuzzer_tokens.txt`` option.
+
+Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23057
+
+FAQ
+=========================
+
+Q. Why Fuzzer does not use any of the LLVM support?
+---------------------------------------------------
+
+There are two reasons.
+
+First, we want this library to be used outside of the LLVM w/o users having to
+build the rest of LLVM. This may sound unconvincing for many LLVM folks,
+but in practice the need for building the whole LLVM frightens many potential
+users -- and we want more users to use this code.
+
+Second, there is a subtle technical reason not to rely on the rest of LLVM, or
+any other large body of code (maybe not even STL). When coverage instrumentation
+is enabled, it will also instrument the LLVM support code which will blow up the
+coverage set of the process (since the fuzzer is in-process). In other words, by
+using more external dependencies we will slow down the fuzzer while the main
+reason for it to exist is extreme speed.
+
+Q. What about Windows then? The Fuzzer contains code that does not build on Windows.
+------------------------------------------------------------------------------------
+
+The sanitizer coverage support does not work on Windows either as of 01/2015.
+Once it's there, we'll need to re-implement OS-specific parts (I/O, signals).
+
+Q. When this Fuzzer is not a good solution for a problem?
+---------------------------------------------------------
+
+* If the test inputs are validated by the target library and the validator
+  asserts/crashes on invalid inputs, the in-process fuzzer is not applicable
+  (we could use fork() w/o exec, but it comes with extra overhead).
+* Bugs in the target library may accumulate w/o being detected. E.g. a memory
+  corruption that goes undetected at first and then leads to a crash while
+  testing another input. This is why it is highly recommended to run this
+  in-process fuzzer with all sanitizers to detect most bugs on the spot.
+* It is harder to protect the in-process fuzzer from excessive memory
+  consumption and infinite loops in the target library (still possible).
+* The target library should not have significant global state that is not
+  reset between the runs.
+* Many interesting target libs are not designed in a way that supports
+  the in-process fuzzer interface (e.g. require a file path instead of a
+  byte array).
+* If a single test run takes a considerable fraction of a second (or
+  more) the speed benefit from the in-process fuzzer is negligible.
+* If the target library runs persistent threads (that outlive
+  execution of one test) the fuzzing results will be unreliable.
+
+Q. So, what exactly this Fuzzer is good for?
+--------------------------------------------
+
+This Fuzzer might be a good choice for testing libraries that have relatively
+small inputs, each input takes < 1ms to run, and the library code is not expected
+to crash on invalid inputs.
+Examples: regular expression matchers, text or binary format parsers.
+
+.. _pcre2: http://www.pcre.org/
+
+.. _AFL: http://lcamtuf.coredump.cx/afl/
+
+.. _SanitizerCoverage: https://code.google.com/p/address-sanitizer/wiki/AsanCoverage
+
+.. _Heartbleed: http://en.wikipedia.org/wiki/Heartbleed
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index 3f4f72a..b0f62e0 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -64,7 +64,9 @@ To upload a new patch:
 * Paste the text diff or upload the patch file.
   Note that TODO
 * Leave the drop down on *Create a new Revision...* and click *Continue*.
-* Enter a descriptive title and summary; add reviewers and mailing
+* Enter a descriptive title and summary.  The title and summary are usually
+  in the form of a :ref:`commit message <commit messages>`.
+* Add reviewers and mailing
   lists that you want to be included in the review. If your patch is
   for LLVM, add llvm-commits as a subscriber; if your patch is for Clang,
   add cfe-commits.
@@ -85,8 +87,11 @@ Reviewing code with Phabricator
 Phabricator allows you to add inline comments as well as overall comments
 to a revision. To add an inline comment, select the lines of code you want
 to comment on by clicking and dragging the line numbers in the diff pane.
+When you have added all your comments, scroll to the bottom of the page and
+click the Submit button.
 
-You can add overall comments or submit your comments at the bottom of the page.
+You can add overall comments in the text box at the bottom of the page.
+When you're done, click the Submit button.
 
 Phabricator has many useful features, for example allowing you to select
 diffs between different versions of the patch as it was reviewed in the
@@ -128,6 +133,16 @@ This allows people reading the version history to see the review for
 context.  This also allows Phabricator to detect the commit, close the
 review, and add a link from the review to the commit.
 
+Abandoning a change
+-------------------
+
+If you decide you should not commit the patch, you should explicitly abandon
+the review so that reviewers don't think it is still open. In the web UI,
+scroll to the bottom of the page where normally you would enter an overall
+comment. In the drop-down Action list, which defaults to "Comment," you should
+select "Abandon Revision" and then enter a comment explaining why. Click the
+Submit button to finish closing the review.
+
 Status
 ------
 
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 2c7e4a9..6a4c22a 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -940,7 +940,7 @@ There are a variety of ways to pass around and use strings in C and C++, and
 LLVM adds a few new options to choose from.  Pick the first option on this list
 that will do what you need, they are ordered according to their relative cost.
 
-Note that is is generally preferred to *not* pass strings around as ``const
+Note that it is generally preferred to *not* pass strings around as ``const
 char*``'s.  These have a number of problems, including the fact that they
 cannot represent embedded nul ("\0") characters, and do not have a length
 available efficiently.  The general replacement for '``const char*``' is
@@ -1687,8 +1687,8 @@ they will automatically convert to a ptr-to-instance type whenever they need to.
 Instead of derferencing the iterator and then taking the address of the result,
 you can simply assign the iterator to the proper pointer type and you get the
 dereference and address-of operation as a result of the assignment (behind the
-scenes, this is a result of overloading casting mechanisms).  Thus the last line
-of the last example,
+scenes, this is a result of overloading casting mechanisms).  Thus the second
+line of the last example,
 
 .. code-block:: c++
 
@@ -2582,8 +2582,9 @@ doxygen info: `Type Clases <http://llvm.org/doxygen/classllvm_1_1Type.html>`_
 
 The Core LLVM classes are the primary means of representing the program being
 inspected or transformed.  The core LLVM classes are defined in header files in
-the ``include/llvm/`` directory, and implemented in the ``lib/VMCore``
-directory.
+the ``include/llvm/IR`` directory, and implemented in the ``lib/IR``
+directory. It's worth noting that, for historical reasons, this library is
+called ``libLLVMCore.so``, not ``libLLVMIR.so`` as you might expect.
 
 .. _Type:
 
@@ -2651,7 +2652,7 @@ Important Derived Types
   Subclass of SequentialType for vector types.  A vector type is similar to an
   ArrayType but is distinguished because it is a first class type whereas
   ArrayType is not.  Vector types are used for vector operations and are usually
-  small vectors of of an integer or floating point type.
+  small vectors of an integer or floating point type.
 
 ``StructType``
   Subclass of DerivedTypes for struct types.
diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
index 48a30c8..093cdd7 100644
--- a/docs/R600Usage.rst
+++ b/docs/R600Usage.rst
@@ -6,22 +6,51 @@ Introduction
 ============
 
 The R600 back-end provides ISA code generation for AMD GPUs, starting with
-the R600 family up until the current Sea Islands (GCN Gen 2).
+the R600 family up until the current Volcanic Islands (GCN Gen 3).
 
 
 Assembler
 =========
 
-The assembler is currently a work in progress and not yet complete.  Below
-are the currently supported features.
+The assembler is currently considered experimental.
+
+For syntax examples look in test/MC/R600.
+
+Below some of the currently supported features (modulo bugs).  These
+all apply to the Southern Islands ISA, Sea Islands and Volcanic Islands
+are also supported but may be missing some instructions and have more bugs:
+
+DS Instructions
+---------------
+All DS instructions are supported.
+
+MUBUF Instructions
+------------------
+All non-atomic MUBUF instructions are supported.
+
+SMRD Instructions
+-----------------
+Only the s_load_dword* SMRD instructions are supported.
+
+SOP1 Instructions
+-----------------
+All SOP1 instructions are supported.
+
+SOP2 Instructions
+-----------------
+All SOP2 instructions are supported.
+
+SOPC Instructions
+-----------------
+All SOPC instructions are supported.
 
 SOPP Instructions
 -----------------
 
-Unless otherwise mentioned, all SOPP instructions that with an operand
-accept a integer operand(s) only.  No verification is performed on the
-operands, so it is up to the programmer to be familiar with the range
-or acceptable values.
+Unless otherwise mentioned, all SOPP instructions that have one or more
+operands accept integer operands only.  No verification is performed
+on the operands, so it is up to the programmer to be familiar with the
+range or acceptable values.
 
 s_waitcnt
 ^^^^^^^^^
@@ -41,3 +70,20 @@ wait for.
    // Wait for vmcnt counter to be 1.
    s_waitcnt vmcnt(1)
 
+VOP1, VOP2, VOP3, VOPC Instructions
+-----------------------------------
+
+All 32-bit and 64-bit encodings should work.
+
+The assembler will automatically detect which encoding size to use for
+VOP1, VOP2, and VOPC instructions based on the operands.  If you want to force
+a specific encoding size, you can add an _e32 (for 32-bit encoding) or
+_e64 (for 64-bit encoding) suffix to the instruction.  Most, but not all
+instructions support an explicit suffix.  These are all valid assembly
+strings:
+
+.. code-block:: nasm
+
+   v_mul_i32_i24 v1, v2, v3
+   v_mul_i32_i24_e32 v1, v2, v3
+   v_mul_i32_i24_e64 v1, v2, v3
diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst
index 2b70217..65c19aa 100644
--- a/docs/Vectorizers.rst
+++ b/docs/Vectorizers.rst
@@ -366,7 +366,7 @@ The decision to unroll the loop depends on the register pressure and the generat
 Performance
 -----------
 
-This section shows the the execution time of Clang on a simple benchmark:
+This section shows the execution time of Clang on a simple benchmark:
 `gcc-loops <http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/UnitTests/Vectorizer/>`_.
 This benchmarks is a collection of loops from the GCC autovectorization
 `page <http://gcc.gnu.org/projects/tree-ssa/vectorization.html>`_ by Dorit Nuzman.
diff --git a/docs/YamlIO.rst b/docs/YamlIO.rst
index 76dd021..3cc683b 100644
--- a/docs/YamlIO.rst
+++ b/docs/YamlIO.rst
@@ -332,7 +332,7 @@ as a field type:
       }
     };
 
-When reading YAML, if the string found does not match any of the the strings
+When reading YAML, if the string found does not match any of the strings
 specified by enumCase() methods, an error is automatically generated.
 When writing YAML, if the value being written does not match any of the values
 specified by the enumCase() methods, a runtime assertion is triggered.
@@ -767,7 +767,7 @@ add "static const bool flow = true;".  For instance:
   };
 
 With the above, if you used MyList as the data type in your native data 
-structures, then then when converted to YAML, a flow sequence of integers 
+structures, then when converted to YAML, a flow sequence of integers 
 will be used (e.g. [ 10, -3, 4 ]).
 
 
diff --git a/docs/index.rst b/docs/index.rst
index adb5419..2cc5b8b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -177,6 +177,7 @@ For developers of applications which use LLVM as a library.
    HowToSetUpLLVMStyleRTTI
    ProgrammersManual
    Extensions
+   LibFuzzer
 
 :doc:`LLVM Language Reference Manual <LangRef>`
   Defines the LLVM intermediate representation and the assembly form of the
@@ -218,6 +219,9 @@ For developers of applications which use LLVM as a library.
 :doc:`CompilerWriterInfo`
   A list of helpful links for compiler writers.
 
+:doc:`LibFuzzer`
+  A library for writing in-process guided fuzzers.
+
 Subsystem Documentation
 =======================
 
diff --git a/docs/tutorial/LangImpl5.rst b/docs/tutorial/LangImpl5.rst
index 72e34b1..ca2ffeb 100644
--- a/docs/tutorial/LangImpl5.rst
+++ b/docs/tutorial/LangImpl5.rst
@@ -254,7 +254,7 @@ In `Chapter 7 <LangImpl7.html>`_ of this tutorial ("mutable variables"),
 we'll talk about #1 in depth. For now, just believe me that you don't
 need SSA construction to handle this case. For #2, you have the choice
 of using the techniques that we will describe for #1, or you can insert
-Phi nodes directly, if convenient. In this case, it is really really
+Phi nodes directly, if convenient. In this case, it is really
 easy to generate the Phi node, so we choose to do it directly.
 
 Okay, enough of the motivation and overview, lets generate code!
@@ -388,7 +388,7 @@ code:
 
 The first two lines here are now familiar: the first adds the "merge"
 block to the Function object (it was previously floating, like the else
-block above). The second block changes the insertion point so that newly
+block above). The second changes the insertion point so that newly
 created code will go into the "merge" block. Once that is done, we need
 to create the PHI node and set up the block/value pairs for the PHI.
 
diff --git a/docs/tutorial/LangImpl7.rst b/docs/tutorial/LangImpl7.rst
index c445908..6489407 100644
--- a/docs/tutorial/LangImpl7.rst
+++ b/docs/tutorial/LangImpl7.rst
@@ -632,7 +632,7 @@ own local variables, lets add this next!
 User-defined Local Variables
 ============================
 
-Adding var/in is just like any other other extensions we made to
+Adding var/in is just like any other extension we made to
 Kaleidoscope: we extend the lexer, the parser, the AST and the code
 generator. The first step for adding our new 'var/in' construct is to
 extend the lexer. As before, this is pretty trivial, the code looks like
diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst
index b8ae3c5..0faecfb 100644
--- a/docs/tutorial/OCamlLangImpl5.rst
+++ b/docs/tutorial/OCamlLangImpl5.rst
@@ -336,7 +336,7 @@ for the 'then' block.
           let phi = build_phi incoming "iftmp" builder in
 
 The first two lines here are now familiar: the first adds the "merge"
-block to the Function object. The second block changes the insertion
+block to the Function object. The second changes the insertion
 point so that newly created code will go into the "merge" block. Once
 that is done, we need to create the PHI node and set up the block/value
 pairs for the PHI.
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index f8129b8..81c48b9 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -163,7 +163,7 @@ void BrainF::header(LLVMContext& C) {
       };
 
       Constant *msgptr = ConstantExpr::
-        getGetElementPtr(aberrormsg, gep_params);
+        getGetElementPtr(aberrormsg->getValueType(), aberrormsg, gep_params);
 
       Value *puts_params[] = {
         msgptr
diff --git a/examples/ExceptionDemo/CMakeLists.txt b/examples/ExceptionDemo/CMakeLists.txt
index 2a7667d..b4354f6 100644
--- a/examples/ExceptionDemo/CMakeLists.txt
+++ b/examples/ExceptionDemo/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
   ExecutionEngine
   MC
   MCJIT
+  RuntimeDyld
   Support
   nativecodegen
   )
@@ -11,6 +12,8 @@ set(LLVM_LINK_COMPONENTS
 set(LLVM_REQUIRES_EH 1)
 set(LLVM_REQUIRES_RTTI 1)
 
+set(LLVM_BUILD_EXAMPLES OFF)
+
 add_llvm_example(ExceptionDemo
   ExceptionDemo.cpp
   )
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index d68c05f..fed42b7 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -1573,7 +1573,7 @@ public:
                                  std::runtime_error::operator=(toCopy)));
   }
 
-  virtual ~OurCppRunException (void) throw () {}
+  ~OurCppRunException(void) throw() override {}
 };
 } // end anonymous namespace
 
diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp
index 04a1e1a..c60f767 100644
--- a/examples/Kaleidoscope/Chapter3/toy.cpp
+++ b/examples/Kaleidoscope/Chapter3/toy.cpp
@@ -93,7 +93,7 @@ class NumberExprAST : public ExprAST {
   double Val;
 public:
   NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -101,7 +101,7 @@ class VariableExprAST : public ExprAST {
   std::string Name;
 public:
   VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -111,7 +111,7 @@ class BinaryExprAST : public ExprAST {
 public:
   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
     : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
@@ -121,7 +121,7 @@ class CallExprAST : public ExprAST {
 public:
   CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
     : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp
index 329c3be..ad091e4 100644
--- a/examples/Kaleidoscope/Chapter4/toy.cpp
+++ b/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -107,7 +107,7 @@ class NumberExprAST : public ExprAST {
 
 public:
   NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -116,7 +116,7 @@ class VariableExprAST : public ExprAST {
 
 public:
   VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -127,7 +127,7 @@ class BinaryExprAST : public ExprAST {
 public:
   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
       : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
@@ -138,7 +138,7 @@ class CallExprAST : public ExprAST {
 public:
   CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
       : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
@@ -452,13 +452,13 @@ class HelpingMemoryManager : public SectionMemoryManager {
 
 public:
   HelpingMemoryManager(MCJITHelper *Helper) : MasterHelper(Helper) {}
-  virtual ~HelpingMemoryManager() {}
+  ~HelpingMemoryManager() override {}
 
   /// This method returns the address of the specified symbol.
   /// Our implementation will attempt to find symbols in other
   /// modules associated with the MCJITHelper to cross link symbols
   /// from one generated module to another.
-  virtual uint64_t getSymbolAddress(const std::string &Name) override;
+  uint64_t getSymbolAddress(const std::string &Name) override;
 
 private:
   MCJITHelper *MasterHelper;
diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt
index aac9949..a938d97 100644
--- a/examples/Kaleidoscope/Chapter5/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
   ExecutionEngine
   InstCombine
   MCJIT
+  RuntimeDyld
   ScalarOpts
   Support
   native
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
index 8ebc2bc..db99048 100644
--- a/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -125,7 +125,7 @@ class NumberExprAST : public ExprAST {
 
 public:
   NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -134,7 +134,7 @@ class VariableExprAST : public ExprAST {
 
 public:
   VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -145,7 +145,7 @@ class BinaryExprAST : public ExprAST {
 public:
   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
       : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
@@ -156,7 +156,7 @@ class CallExprAST : public ExprAST {
 public:
   CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
       : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
@@ -166,7 +166,7 @@ class IfExprAST : public ExprAST {
 public:
   IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
       : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
@@ -178,7 +178,7 @@ public:
   ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
              ExprAST *step, ExprAST *body)
       : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt
index 55b9a78..7ac1ca4 100644
--- a/examples/Kaleidoscope/Chapter6/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
   ExecutionEngine
   InstCombine
   MCJIT
+  RuntimeDyld
   ScalarOpts
   Support
   native
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
index eb7e8e1..e978a3e 100644
--- a/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -133,7 +133,7 @@ class NumberExprAST : public ExprAST {
 
 public:
   NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -142,7 +142,7 @@ class VariableExprAST : public ExprAST {
 
 public:
   VariableExprAST(const std::string &name) : Name(name) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// UnaryExprAST - Expression class for a unary operator.
@@ -153,7 +153,7 @@ class UnaryExprAST : public ExprAST {
 public:
   UnaryExprAST(char opcode, ExprAST *operand)
       : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -164,7 +164,7 @@ class BinaryExprAST : public ExprAST {
 public:
   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
       : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
@@ -175,7 +175,7 @@ class CallExprAST : public ExprAST {
 public:
   CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
       : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
@@ -185,7 +185,7 @@ class IfExprAST : public ExprAST {
 public:
   IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
       : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
@@ -197,7 +197,7 @@ public:
   ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
              ExprAST *step, ExprAST *body)
       : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
index 4bc1b2c..23aba65 100644
--- a/examples/Kaleidoscope/Chapter7/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
   ExecutionEngine
   InstCombine
   MCJIT
+  RuntimeDyld
   ScalarOpts
   Support
   TransformUtils
@@ -12,6 +13,8 @@ set(LLVM_LINK_COMPONENTS
 
 set(LLVM_REQUIRES_RTTI 1)
 
+set(LLVM_BUILD_EXAMPLES OFF)
+
 add_kaleidoscope_chapter(Kaleidoscope-Ch7
   toy.cpp
   )
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
index ce5e1dd..53ea51c 100644
--- a/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -138,7 +138,7 @@ class NumberExprAST : public ExprAST {
 
 public:
   NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -148,7 +148,7 @@ class VariableExprAST : public ExprAST {
 public:
   VariableExprAST(const std::string &name) : Name(name) {}
   const std::string &getName() const { return Name; }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// UnaryExprAST - Expression class for a unary operator.
@@ -159,7 +159,7 @@ class UnaryExprAST : public ExprAST {
 public:
   UnaryExprAST(char opcode, ExprAST *operand)
       : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -170,7 +170,7 @@ class BinaryExprAST : public ExprAST {
 public:
   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
       : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
@@ -181,7 +181,7 @@ class CallExprAST : public ExprAST {
 public:
   CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
       : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
@@ -191,7 +191,7 @@ class IfExprAST : public ExprAST {
 public:
   IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
       : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
@@ -203,7 +203,7 @@ public:
   ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
              ExprAST *step, ExprAST *body)
       : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VarExprAST - Expression class for var/in
@@ -216,7 +216,7 @@ public:
              ExprAST *body)
       : VarNames(varnames), Body(body) {}
 
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
diff --git a/examples/Kaleidoscope/Chapter8/CMakeLists.txt b/examples/Kaleidoscope/Chapter8/CMakeLists.txt
index 42882e9..90c79c0 100644
--- a/examples/Kaleidoscope/Chapter8/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter8/CMakeLists.txt
@@ -2,12 +2,15 @@ set(LLVM_LINK_COMPONENTS
   Core
   ExecutionEngine
   MCJIT
+  RuntimeDyld
   Support
   native
   )
 
 set(LLVM_REQUIRES_RTTI 1)
 
+set(LLVM_BUILD_EXAMPLES OFF)
+
 add_kaleidoscope_chapter(Kaleidoscope-Ch8
   toy.cpp
   )
diff --git a/examples/Kaleidoscope/Chapter8/toy.cpp b/examples/Kaleidoscope/Chapter8/toy.cpp
index 39b6a65..c7e096c 100644
--- a/examples/Kaleidoscope/Chapter8/toy.cpp
+++ b/examples/Kaleidoscope/Chapter8/toy.cpp
@@ -221,10 +221,10 @@ class NumberExprAST : public ExprAST {
 
 public:
   NumberExprAST(double val) : Val(val) {}
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     return ExprAST::dump(out << Val, ind);
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -235,10 +235,10 @@ public:
   VariableExprAST(SourceLocation Loc, const std::string &name)
       : ExprAST(Loc), Name(name) {}
   const std::string &getName() const { return Name; }
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     return ExprAST::dump(out << Name, ind);
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// UnaryExprAST - Expression class for a unary operator.
@@ -249,12 +249,12 @@ class UnaryExprAST : public ExprAST {
 public:
   UnaryExprAST(char opcode, ExprAST *operand)
       : Opcode(opcode), Operand(operand) {}
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     ExprAST::dump(out << "unary" << Opcode, ind);
     Operand->dump(out, ind + 1);
     return out;
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
@@ -265,13 +265,13 @@ class BinaryExprAST : public ExprAST {
 public:
   BinaryExprAST(SourceLocation Loc, char op, ExprAST *lhs, ExprAST *rhs)
       : ExprAST(Loc), Op(op), LHS(lhs), RHS(rhs) {}
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     ExprAST::dump(out << "binary" << Op, ind);
     LHS->dump(indent(out, ind) << "LHS:", ind + 1);
     RHS->dump(indent(out, ind) << "RHS:", ind + 1);
     return out;
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
@@ -283,13 +283,13 @@ public:
   CallExprAST(SourceLocation Loc, const std::string &callee,
               std::vector<ExprAST *> &args)
       : ExprAST(Loc), Callee(callee), Args(args) {}
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     ExprAST::dump(out << "call " << Callee, ind);
     for (ExprAST *Arg : Args)
       Arg->dump(indent(out, ind + 1), ind + 1);
     return out;
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
@@ -299,14 +299,14 @@ class IfExprAST : public ExprAST {
 public:
   IfExprAST(SourceLocation Loc, ExprAST *cond, ExprAST *then, ExprAST *_else)
       : ExprAST(Loc), Cond(cond), Then(then), Else(_else) {}
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     ExprAST::dump(out << "if", ind);
     Cond->dump(indent(out, ind) << "Cond:", ind + 1);
     Then->dump(indent(out, ind) << "Then:", ind + 1);
     Else->dump(indent(out, ind) << "Else:", ind + 1);
     return out;
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
@@ -318,7 +318,7 @@ public:
   ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
              ExprAST *step, ExprAST *body)
       : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     ExprAST::dump(out << "for", ind);
     Start->dump(indent(out, ind) << "Cond:", ind + 1);
     End->dump(indent(out, ind) << "End:", ind + 1);
@@ -326,7 +326,7 @@ public:
     Body->dump(indent(out, ind) << "Body:", ind + 1);
     return out;
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// VarExprAST - Expression class for var/in
@@ -339,14 +339,14 @@ public:
              ExprAST *body)
       : VarNames(varnames), Body(body) {}
 
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  std::ostream &dump(std::ostream &out, int ind) override {
     ExprAST::dump(out << "var", ind);
     for (const auto &NamedVar : VarNames)
       NamedVar.second->dump(indent(out, ind) << NamedVar.first << ':', ind + 1);
     Body->dump(indent(out, ind) << "Body:", ind + 1);
     return out;
   }
-  virtual Value *Codegen();
+  Value *Codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
@@ -817,7 +817,7 @@ static PrototypeAST *ParseExtern() {
 static DIBuilder *DBuilder;
 
 DIType DebugInfo::getDoubleTy() {
-  if (DblTy.isValid())
+  if (DblTy)
     return DblTy;
 
   DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float);
@@ -827,16 +827,16 @@ DIType DebugInfo::getDoubleTy() {
 void DebugInfo::emitLocation(ExprAST *AST) {
   if (!AST)
     return Builder.SetCurrentDebugLocation(DebugLoc());
-  DIScope *Scope;
+  MDScope *Scope;
   if (LexicalBlocks.empty())
-    Scope = &TheCU;
+    Scope = TheCU;
   else
-    Scope = LexicalBlocks.back();
+    Scope = *LexicalBlocks.back();
   Builder.SetCurrentDebugLocation(
-      DebugLoc::get(AST->getLine(), AST->getCol(), DIScope(*Scope)));
+      DebugLoc::get(AST->getLine(), AST->getCol(), Scope));
 }
 
-static DICompositeType CreateFunctionType(unsigned NumArgs, DIFile Unit) {
+static MDSubroutineType *CreateFunctionType(unsigned NumArgs, DIFile Unit) {
   SmallVector<Metadata *, 8> EltTys;
   DIType DblTy = KSDbgInfo.getDoubleTy();
 
@@ -1224,15 +1224,15 @@ Function *PrototypeAST::Codegen() {
     AI->setName(Args[Idx]);
 
   // Create a subprogram DIE for this function.
-  DIFile Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(),
-                                     KSDbgInfo.TheCU.getDirectory());
-  DIDescriptor FContext(Unit);
+  DIFile Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(),
+                                     KSDbgInfo.TheCU->getDirectory());
+  MDScope *FContext = Unit;
   unsigned LineNo = Line;
   unsigned ScopeLine = Line;
   DISubprogram SP = DBuilder->createFunction(
       FContext, Name, StringRef(), Unit, LineNo,
       CreateFunctionType(Args.size(), Unit), false /* internal linkage */,
-      true /* definition */, ScopeLine, DIDescriptor::FlagPrototyped, false, F);
+      true /* definition */, ScopeLine, DebugNode::FlagPrototyped, false, F);
 
   KSDbgInfo.FnScopeMap[this] = SP;
   return F;
@@ -1248,15 +1248,15 @@ void PrototypeAST::CreateArgumentAllocas(Function *F) {
 
     // Create a debug descriptor for the variable.
     DIScope *Scope = KSDbgInfo.LexicalBlocks.back();
-    DIFile Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(),
-                                       KSDbgInfo.TheCU.getDirectory());
+    DIFile Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(),
+                                       KSDbgInfo.TheCU->getDirectory());
     DIVariable D = DBuilder->createLocalVariable(dwarf::DW_TAG_arg_variable,
                                                  *Scope, Args[Idx], Unit, Line,
                                                  KSDbgInfo.getDoubleTy(), Idx);
 
-    Instruction *Call = DBuilder->insertDeclare(
-        Alloca, D, DBuilder->createExpression(), Builder.GetInsertBlock());
-    Call->setDebugLoc(DebugLoc::get(Line, 0, *Scope));
+    DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
+                            DebugLoc::get(Line, 0, *Scope),
+                            Builder.GetInsertBlock());
 
     // Store the initial value into the alloca.
     Builder.CreateStore(AI, Alloca);
diff --git a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
index 3b5dcd0..b582292 100644
--- a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
+++ b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
@@ -1,6 +1,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
@@ -1168,11 +1169,9 @@ public:
   KaleidoscopeJIT(SessionContext &Session)
     : Session(Session),
       Mang(Session.getTarget().getDataLayout()),
-      ObjectLayer(
-        [](){ return llvm::make_unique<SectionMemoryManager>(); }),
       CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())),
       LazyEmitLayer(CompileLayer),
-      CompileCallbacks(LazyEmitLayer, Session.getLLVMContext(),
+      CompileCallbacks(LazyEmitLayer, CCMgrMemMgr, Session.getLLVMContext(),
                        reinterpret_cast<uintptr_t>(EarthShatteringKaboom),
                        64) {}
 
@@ -1194,20 +1193,22 @@ public:
     // We need a memory manager to allocate memory and resolve symbols for this
     // new module. Create one that resolves symbols by looking back into the
     // JIT.
-    auto MM = createLookasideRTDyldMM<SectionMemoryManager>(
-                [&](const std::string &Name) {
-                  // First try to find 'Name' within the JIT.
-                  if (auto Symbol = findSymbol(Name))
-                    return Symbol.getAddress();
-
-                  // If we don't already have a definition of 'Name' then search
-                  // the ASTs.
-                  return searchFunctionASTs(Name);
-                },
-                [](const std::string &S) { return 0; } );
+    auto Resolver = createLambdaResolver(
+                      [&](const std::string &Name) {
+                        // First try to find 'Name' within the JIT.
+                        if (auto Symbol = findSymbol(Name))
+                          return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
+                                                         Symbol.getFlags());
+
+                        // If we don't already have a definition of 'Name' then search
+                        // the ASTs.
+                        return searchFunctionASTs(Name);
+                      },
+                      [](const std::string &S) { return nullptr; } );
 
     return LazyEmitLayer.addModuleSet(singletonSet(std::move(M)),
-                                      std::move(MM));
+                                      make_unique<SectionMemoryManager>(),
+                                      std::move(Resolver));
   }
 
   void removeModule(ModuleHandleT H) { LazyEmitLayer.removeModuleSet(H); }
@@ -1232,7 +1233,7 @@ private:
 
   // This method searches the FunctionDefs map for a definition of 'Name'. If it
   // finds one it generates a stub for it and returns the address of the stub.
-  TargetAddress searchFunctionASTs(const std::string &Name) {
+  RuntimeDyld::SymbolInfo searchFunctionASTs(const std::string &Name) {
     auto DefI = FunctionDefs.find(Name);
     if (DefI == FunctionDefs.end())
       return 0;
@@ -1244,7 +1245,8 @@ private:
 
     // IRGen the AST, add it to the JIT, and return the address for it.
     auto H = irGenStub(std::move(FnAST));
-    return findSymbolIn(H, Name).getAddress();
+    auto Sym = findSymbolIn(H, Name);
+    return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
   }
 
   // This method will take the AST for a function definition and IR-gen a stub
@@ -1261,14 +1263,16 @@ private:
     //         compile and update actions for the callback, and get a pointer to
     //         the jit trampoline that we need to call to trigger those actions.
     auto CallbackInfo =
-      CompileCallbacks.getCompileCallback(*F->getFunctionType());
+      CompileCallbacks.getCompileCallback(F->getContext());
 
     // Step 3) Create a stub that will indirectly call the body of this
     //         function once it is compiled. Initially, set the function
     //         pointer for the indirection to point at the trampoline.
     std::string BodyPtrName = (F->getName() + "$address").str();
     GlobalVariable *FunctionBodyPointer =
-      createImplPointer(*F, BodyPtrName, CallbackInfo.getAddress());
+      createImplPointer(*F->getType(), *F->getParent(), BodyPtrName,
+                        createIRTypedAddress(*F->getFunctionType(),
+                                             CallbackInfo.getAddress()));
     makeStub(*F, *FunctionBodyPointer);
 
     // Step 4) Add the module containing the stub to the JIT.
@@ -1297,6 +1301,7 @@ private:
 
   SessionContext &Session;
   Mangler Mang;
+  SectionMemoryManager CCMgrMemMgr;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
   LazyEmitLayerT LazyEmitLayer;
diff --git a/examples/Kaleidoscope/Orc/initial/toy.cpp b/examples/Kaleidoscope/Orc/initial/toy.cpp
index cc6fb8e..bf43f29 100644
--- a/examples/Kaleidoscope/Orc/initial/toy.cpp
+++ b/examples/Kaleidoscope/Orc/initial/toy.cpp
@@ -1,6 +1,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
 #include "llvm/IR/DataLayout.h"
@@ -1175,13 +1176,18 @@ public:
     // We need a memory manager to allocate memory and resolve symbols for this
     // new module. Create one that resolves symbols by looking back into the
     // JIT.
-    auto MM = createLookasideRTDyldMM<SectionMemoryManager>(
-                [&](const std::string &Name) {
-                  return findSymbol(Name).getAddress();
-                },
-                [](const std::string &S) { return 0; } );
-
-    return CompileLayer.addModuleSet(singletonSet(std::move(M)), std::move(MM));
+    auto Resolver = createLambdaResolver(
+                      [&](const std::string &Name) {
+                        if (auto Sym = findSymbol(Name))
+                          return RuntimeDyld::SymbolInfo(Sym.getAddress(),
+                                                         Sym.getFlags());
+                        return RuntimeDyld::SymbolInfo(nullptr);
+                      },
+                      [](const std::string &S) { return nullptr; }
+                    );
+    return CompileLayer.addModuleSet(singletonSet(std::move(M)),
+                                     make_unique<SectionMemoryManager>(),
+                                     std::move(Resolver));
   }
 
   void removeModule(ModuleHandleT H) { CompileLayer.removeModuleSet(H); }
diff --git a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
index 6e2ec27..1369ba6 100644
--- a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
+++ b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
@@ -1,6 +1,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
 #include "llvm/IR/DataLayout.h"
@@ -1178,14 +1179,18 @@ public:
     // We need a memory manager to allocate memory and resolve symbols for this
     // new module. Create one that resolves symbols by looking back into the
     // JIT.
-    auto MM = createLookasideRTDyldMM<SectionMemoryManager>(
-                [&](const std::string &Name) {
-                  return findSymbol(Name).getAddress();
-                },
-                [](const std::string &S) { return 0; } );
+    auto Resolver = createLambdaResolver(
+                      [&](const std::string &Name) {
+                        if (auto Sym = findSymbol(Name))
+                          return RuntimeDyld::SymbolInfo(Sym.getAddress(),
+                                                         Sym.getFlags());
+                        return RuntimeDyld::SymbolInfo(nullptr);
+                      },
+                      [](const std::string &S) { return nullptr; } );
 
     return LazyEmitLayer.addModuleSet(singletonSet(std::move(M)),
-                                      std::move(MM));
+                                      make_unique<SectionMemoryManager>(),
+                                      std::move(Resolver));
   }
 
   void removeModule(ModuleHandleT H) { LazyEmitLayer.removeModuleSet(H); }
diff --git a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
index 19801e1..c489a45 100644
--- a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
+++ b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
@@ -1,6 +1,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
 #include "llvm/IR/DataLayout.h"
@@ -1183,20 +1184,22 @@ public:
     // We need a memory manager to allocate memory and resolve symbols for this
     // new module. Create one that resolves symbols by looking back into the
     // JIT.
-    auto MM = createLookasideRTDyldMM<SectionMemoryManager>(
-                [&](const std::string &Name) {
-                  // First try to find 'Name' within the JIT.
-                  if (auto Symbol = findSymbol(Name))
-                    return Symbol.getAddress();
-
-                  // If we don't already have a definition of 'Name' then search
-                  // the ASTs.
-                  return searchFunctionASTs(Name);
-                },
-                [](const std::string &S) { return 0; } );
+    auto Resolver = createLambdaResolver(
+                      [&](const std::string &Name) {
+                        // First try to find 'Name' within the JIT.
+                        if (auto Symbol = findSymbol(Name))
+                          return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
+                                                         Symbol.getFlags());
+
+                        // If we don't already have a definition of 'Name' then search
+                        // the ASTs.
+                        return searchFunctionASTs(Name);
+                      },
+                      [](const std::string &S) { return nullptr; } );
 
     return LazyEmitLayer.addModuleSet(singletonSet(std::move(M)),
-                                      std::move(MM));
+                                      make_unique<SectionMemoryManager>(),
+                                      std::move(Resolver));
   }
 
   void removeModule(ModuleHandleT H) { LazyEmitLayer.removeModuleSet(H); }
@@ -1217,7 +1220,7 @@ private:
 
   // This method searches the FunctionDefs map for a definition of 'Name'. If it
   // finds one it generates a stub for it and returns the address of the stub.
-  TargetAddress searchFunctionASTs(const std::string &Name) {
+  RuntimeDyld::SymbolInfo searchFunctionASTs(const std::string &Name) {
     auto DefI = FunctionDefs.find(Name);
     if (DefI == FunctionDefs.end())
       return 0;
@@ -1228,7 +1231,8 @@ private:
 
     // IRGen the AST, add it to the JIT, and return the address for it.
     auto H = addModule(IRGen(Session, *FnAST));
-    return findSymbolIn(H, Name).getAddress();
+    auto Sym = findSymbolIn(H, Name);
+    return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
   }
 
   SessionContext &Session;
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 8873fdb..effbd15 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -169,6 +169,7 @@ typedef enum {
     LLVMNonNullAttribute = 1ULL << 37,
     LLVMJumpTableAttribute = 1ULL << 38,
     LLVMDereferenceableAttribute = 1ULL << 39,
+    LLVMDereferenceableOrNullAttribute = 1ULL << 40,
     */
 } LLVMAttribute;
 
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 8c65f70..9c2365a 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -100,16 +100,18 @@ public:
     }
 
     const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
+    unsigned NumEntries = getNumEntries();
     for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) {
       if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey)) {
         if (!KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) {
           P->getSecond().~ValueT();
-          decrementNumEntries();
+          --NumEntries;
         }
         P->getFirst() = EmptyKey;
       }
     }
-    assert(getNumEntries() == 0 && "Node count imbalance!");
+    assert(NumEntries == 0 && "Node count imbalance!");
+    setNumEntries(0);
     setNumTombstones(0);
   }
 
@@ -257,7 +259,7 @@ public:
   const void *getPointerIntoBucketsArray() const { return getBuckets(); }
 
 protected:
-  DenseMapBase() {}
+  DenseMapBase() = default;
 
   void destroyAll() {
     if (getNumBuckets() == 0) // Nothing to do.
@@ -341,11 +343,6 @@ protected:
       }
   }
 
-  void swap(DenseMapBase& RHS) {
-    std::swap(getNumEntries(), RHS.getNumEntries());
-    std::swap(getNumTombstones(), RHS.getNumTombstones());
-  }
-
   static unsigned getHashValue(const KeyT &Val) {
     return KeyInfoT::getHashValue(Val);
   }
@@ -589,6 +586,8 @@ public:
   }
 
   void swap(DenseMap& RHS) {
+    this->incrementEpoch();
+    RHS.incrementEpoch();
     std::swap(Buckets, RHS.Buckets);
     std::swap(NumEntries, RHS.NumEntries);
     std::swap(NumTombstones, RHS.NumTombstones);
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 5186333..e9668c4 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -209,7 +209,7 @@ df_iterator<T> df_end(const T& G) {
 // Provide an accessor method to use them in range-based patterns.
 template <class T>
 iterator_range<df_iterator<T>> depth_first(const T& G) {
-  return iterator_range<df_iterator<T>>(df_begin(G), df_end(G));
+  return make_range(df_begin(G), df_end(G));
 }
 
 // Provide global definitions of external depth first iterators...
@@ -232,8 +232,7 @@ df_ext_iterator<T, SetTy> df_ext_end(const T& G, SetTy &S) {
 template <class T, class SetTy>
 iterator_range<df_ext_iterator<T, SetTy>> depth_first_ext(const T& G,
                                                           SetTy &S) {
-  return iterator_range<df_ext_iterator<T, SetTy>>(df_ext_begin(G, S),
-                                                   df_ext_end(G, S));
+  return make_range(df_ext_begin(G, S), df_ext_end(G, S));
 }
 
 
@@ -259,7 +258,7 @@ idf_iterator<T> idf_end(const T& G){
 // Provide an accessor method to use them in range-based patterns.
 template <class T>
 iterator_range<idf_iterator<T>> inverse_depth_first(const T& G) {
-  return iterator_range<idf_iterator<T>>(idf_begin(G), idf_end(G));
+  return make_range(idf_begin(G), idf_end(G));
 }
 
 // Provide global definitions of external inverse depth first iterators...
@@ -284,8 +283,7 @@ idf_ext_iterator<T, SetTy> idf_ext_end(const T& G, SetTy &S) {
 template <class T, class SetTy>
 iterator_range<idf_ext_iterator<T, SetTy>> inverse_depth_first_ext(const T& G,
                                                                    SetTy &S) {
-  return iterator_range<idf_ext_iterator<T, SetTy>>(idf_ext_begin(G, S),
-                                                    idf_ext_end(G, S));
+  return make_range(idf_ext_begin(G, S), idf_ext_end(G, S));
 }
 
 } // End llvm namespace
diff --git a/include/llvm/ADT/EpochTracker.h b/include/llvm/ADT/EpochTracker.h
index d593073..582d581 100644
--- a/include/llvm/ADT/EpochTracker.h
+++ b/include/llvm/ADT/EpochTracker.h
@@ -30,7 +30,7 @@ public:
 
   class HandleBase {
   public:
-    HandleBase() {}
+    HandleBase() = default;
     explicit HandleBase(const DebugEpochBase *) {}
     bool isHandleInSync() const { return true; }
     const void *getEpochAddress() const { return nullptr; }
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
index a1e7864..77e6d77 100644
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -75,7 +75,7 @@ class hash_code {
 public:
   /// \brief Default construct a hash_code.
   /// Note that this leaves the value uninitialized.
-  hash_code() {}
+  hash_code() = default;
 
   /// \brief Form a hash code directly from a numerical value.
   hash_code(size_t value) : value(value) {}
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index fa337e9..759a2db 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/iterator_range.h"
 #include <set>
 #include <vector>
 
@@ -174,9 +175,13 @@ public:
 // Provide global constructors that automatically figure out correct types...
 //
 template <class T>
-po_iterator<T> po_begin(T G) { return po_iterator<T>::begin(G); }
+po_iterator<T> po_begin(const T &G) { return po_iterator<T>::begin(G); }
 template <class T>
-po_iterator<T> po_end  (T G) { return po_iterator<T>::end(G); }
+po_iterator<T> po_end  (const T &G) { return po_iterator<T>::end(G); }
+
+template <class T> iterator_range<po_iterator<T>> post_order(const T &G) {
+  return make_range(po_begin(G), po_end(G));
+}
 
 // Provide global definitions of external postorder iterators...
 template<class T, class SetType=std::set<typename GraphTraits<T>::NodeType*> >
@@ -195,6 +200,11 @@ po_ext_iterator<T, SetType> po_ext_end(T G, SetType &S) {
   return po_ext_iterator<T, SetType>::end(G, S);
 }
 
+template <class T, class SetType>
+iterator_range<po_ext_iterator<T, SetType>> post_order_ext(const T &G, SetType &S) {
+  return make_range(po_ext_begin(G, S), po_ext_end(G, S));
+}
+
 // Provide global definitions of inverse post order iterators...
 template <class T,
           class SetType = std::set<typename GraphTraits<T>::NodeType*>,
@@ -205,15 +215,20 @@ struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External > {
 };
 
 template <class T>
-ipo_iterator<T> ipo_begin(T G, bool Reverse = false) {
+ipo_iterator<T> ipo_begin(const T &G, bool Reverse = false) {
   return ipo_iterator<T>::begin(G, Reverse);
 }
 
 template <class T>
-ipo_iterator<T> ipo_end(T G){
+ipo_iterator<T> ipo_end(const T &G){
   return ipo_iterator<T>::end(G);
 }
 
+template <class T>
+iterator_range<ipo_iterator<T>> inverse_post_order(const T &G, bool Reverse = false) {
+  return make_range(ipo_begin(G, Reverse), ipo_end(G));
+}
+
 // Provide global definitions of external inverse postorder iterators...
 template <class T,
           class SetType = std::set<typename GraphTraits<T>::NodeType*> >
@@ -225,15 +240,21 @@ struct ipo_ext_iterator : public ipo_iterator<T, SetType, true> {
 };
 
 template <class T, class SetType>
-ipo_ext_iterator<T, SetType> ipo_ext_begin(T G, SetType &S) {
+ipo_ext_iterator<T, SetType> ipo_ext_begin(const T &G, SetType &S) {
   return ipo_ext_iterator<T, SetType>::begin(G, S);
 }
 
 template <class T, class SetType>
-ipo_ext_iterator<T, SetType> ipo_ext_end(T G, SetType &S) {
+ipo_ext_iterator<T, SetType> ipo_ext_end(const T &G, SetType &S) {
   return ipo_ext_iterator<T, SetType>::end(G, S);
 }
 
+template <class T, class SetType>
+iterator_range<ipo_ext_iterator<T, SetType>>
+inverse_post_order_ext(const T &G, SetType &S) {
+  return make_range(ipo_ext_begin(G, S), ipo_ext_end(G, S));
+}
+
 //===--------------------------------------------------------------------===//
 // Reverse Post Order CFG iterator code
 //===--------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index 51bb18d..fbe5b65 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -430,7 +430,7 @@ namespace llvm {
     /// Return the twine contents as a std::string.
     std::string str() const;
 
-    /// Write the concatenated string into the given SmallString or SmallVector.
+    /// Append the concatenated string into the given SmallString or SmallVector.
     void toVector(SmallVectorImpl<char> &Out) const;
 
     /// This returns the twine as a single StringRef.  This method is only valid
diff --git a/include/llvm/ADT/iterator.h b/include/llvm/ADT/iterator.h
index e2c9e5e..54a288d 100644
--- a/include/llvm/ADT/iterator.h
+++ b/include/llvm/ADT/iterator.h
@@ -150,7 +150,7 @@ class iterator_adaptor_base
 protected:
   WrappedIteratorT I;
 
-  iterator_adaptor_base() {}
+  iterator_adaptor_base() = default;
 
   template <typename U>
   explicit iterator_adaptor_base(
@@ -231,7 +231,7 @@ struct pointee_iterator
           pointee_iterator<WrappedIteratorT>, WrappedIteratorT,
           typename std::iterator_traits<WrappedIteratorT>::iterator_category,
           T> {
-  pointee_iterator() {}
+  pointee_iterator() = default;
   template <typename U>
   pointee_iterator(U &&u)
       : pointee_iterator::iterator_adaptor_base(std::forward<U &&>(u)) {}
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 43bcc34..6999bd1 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -145,6 +145,19 @@ public:
   Location getLocation(const AtomicRMWInst *RMWI);
   static Location getLocationForSource(const MemTransferInst *MTI);
   static Location getLocationForDest(const MemIntrinsic *MI);
+  Location getLocation(const Instruction *Inst) {
+    if (auto *I = dyn_cast<LoadInst>(Inst))
+      return getLocation(I);
+    else if (auto *I = dyn_cast<StoreInst>(Inst))
+      return getLocation(I);
+    else if (auto *I = dyn_cast<VAArgInst>(Inst))
+      return getLocation(I);
+    else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
+      return getLocation(I);
+    else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
+      return getLocation(I);
+    llvm_unreachable("unsupported memory instruction");
+  }
 
   /// Alias analysis result - Either we know for sure that it does not alias, we
   /// know for sure it must alias, or we don't know anything: The two pointers
@@ -352,6 +365,24 @@ public:
     return (MRB & ModRef) && (MRB & ArgumentPointees);
   }
 
+  /// getModRefInfo - Return information about whether or not an
+  /// instruction may read or write memory (without regard to a
+  /// specific location)
+  ModRefResult getModRefInfo(const Instruction *I) {
+    if (auto CS = ImmutableCallSite(I)) {
+      auto MRB = getModRefBehavior(CS);
+      if (MRB & ModRef)
+        return ModRef;
+      else if (MRB & Ref)
+        return Ref;
+      else if (MRB & Mod)
+        return Mod;
+      return NoModRef;
+    }
+
+    return getModRefInfo(I, Location());
+  }
+
   /// getModRefInfo - Return information about whether or not an instruction may
   /// read or write the specified memory location.  An instruction
   /// that doesn't read or write memory may be trivially LICM'd for example.
@@ -472,6 +503,10 @@ public:
   ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){
     return getModRefInfo(I, Location(P, Size));
   }
+  /// getModRefInfo - Return information about whether a call and an instruction
+  /// may refer to the same memory locations.
+  ModRefResult getModRefInfo(Instruction *I,
+                             ImmutableCallSite Call);
 
   /// getModRefInfo - Return information about whether two call sites may refer
   /// to the same set of memory locations.  See 
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index fc1393f..1f00b69 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -165,7 +165,7 @@ public:
   AssumptionCache &getAssumptionCache(Function &F);
 
   AssumptionCacheTracker();
-  ~AssumptionCacheTracker();
+  ~AssumptionCacheTracker() override;
 
   void releaseMemory() override { AssumptionCaches.shrink_and_clear(); }
 
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index 3289a28..f27c32d 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -34,7 +34,7 @@ public:
 
   BlockFrequencyInfo();
 
-  ~BlockFrequencyInfo();
+  ~BlockFrequencyInfo() override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 57b5154..9acc863 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -718,9 +718,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
 ///
 /// It has some known flaws.
 ///
-///   - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting
-///     BlockFrequency's 64-bit integer precision.
-///
 ///   - The model of irreducible control flow is a rough approximation.
 ///
 ///     Modelling irreducible control flow exactly involves setting up and
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 64d288a..14b8822 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -318,7 +318,7 @@ public:
   static char ID; // Class identification, replacement for typeinfo
 
   CallGraphWrapperPass();
-  virtual ~CallGraphWrapperPass();
+  ~CallGraphWrapperPass() override;
 
   /// \brief The internal \c CallGraph around which the rest of this interface
   /// is wrapped.
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index ce0b899..0b3b2ea 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -219,7 +219,7 @@ namespace llvm {
   public:
     FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent,
                    unsigned Levels);
-    ~FullDependence() { delete[] DV; }
+    ~FullDependence() override { delete[] DV; }
 
     /// isLoopIndependent - Returns true if this is a loop-independent
     /// dependence.
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index fdee9f8..79ed74d 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -107,7 +107,7 @@ public:
   static char ID;
 
   InlineCostAnalysis();
-  ~InlineCostAnalysis();
+  ~InlineCostAnalysis() override;
 
   // Pass interface implementation.
   void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/include/llvm/Analysis/JumpInstrTableInfo.h b/include/llvm/Analysis/JumpInstrTableInfo.h
index 591e794..b6dad47 100644
--- a/include/llvm/Analysis/JumpInstrTableInfo.h
+++ b/include/llvm/Analysis/JumpInstrTableInfo.h
@@ -39,7 +39,7 @@ public:
   /// The default byte alignment for jump tables is 16, which is large but
   /// usually safe.
   JumpInstrTableInfo(uint64_t ByteAlign = 16);
-  virtual ~JumpInstrTableInfo();
+  ~JumpInstrTableInfo() override;
   const char *getPassName() const override {
     return "Jump-Instruction Table Info";
   }
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 8e5bbfb..1051cff 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -39,7 +39,7 @@ public:
   LazyValueInfo() : FunctionPass(ID), PImpl(nullptr) {
     initializeLazyValueInfoPass(*PassRegistry::getPassRegistry());
   }
-  ~LazyValueInfo() { assert(!PImpl && "releaseMemory not called"); }
+  ~LazyValueInfo() override { assert(!PImpl && "releaseMemory not called"); }
 
   /// This is used to return true/false/dunno results.
   enum Tristate {
diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h
index 49e0dc8..df95e0e 100644
--- a/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ b/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -36,8 +36,8 @@ namespace llvm {
         : FunctionPass(ID), LCI(LC) {
       initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
-    ~LibCallAliasAnalysis();
-    
+    ~LibCallAliasAnalysis() override;
+
     ModRefResult getModRefInfo(ImmutableCallSite CS,
                                const Location &Loc) override;
  
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 0a9dc07..a4393bb 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -339,6 +339,10 @@ public:
     bool needsChecking(unsigned I, unsigned J,
                        const SmallVectorImpl<int> *PtrPartition) const;
 
+    /// \brief Return true if any pointer requires run-time checking according
+    /// to needsChecking.
+    bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
+
     /// \brief Print the list run-time memory checks necessary.
     ///
     /// If \p PtrPartition is set, it contains the partition number for
@@ -428,6 +432,13 @@ public:
   /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
   unsigned NumSymbolicStrides;
 
+  /// \brief Checks existence of store to invariant address inside loop.
+  /// If the loop has any store to invariant address, then it returns true,
+  /// else returns false.
+  bool hasStoreToLoopInvariantAddress() const {
+    return StoreToLoopInvariantAddress;
+  }
+
 private:
   /// \brief Analyze the loop.  Substitute symbolic strides using Strides.
   void analyzeLoop(const ValueToValueMap &Strides);
@@ -465,6 +476,10 @@ private:
   /// \brief Cache the result of analyzeLoop.
   bool CanVecMem;
 
+  /// \brief Indicator for storing to uniform addresses.
+  /// If a loop has write to a loop invariant address then it should be true.
+  bool StoreToLoopInvariantAddress;
+
   /// \brief The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
   Optional<LoopAccessReport> Report;
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 85c2da7..f3d85e6 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -464,23 +464,20 @@ public:
   /// cannot find a terminating instruction with location information,
   /// it returns an unknown location.
   DebugLoc getStartLoc() const {
-    DebugLoc StartLoc;
     BasicBlock *HeadBB;
 
     // Try the pre-header first.
-    if ((HeadBB = getLoopPreheader()) != nullptr) {
-      StartLoc = HeadBB->getTerminator()->getDebugLoc();
-      if (!StartLoc.isUnknown())
-        return StartLoc;
-    }
+    if ((HeadBB = getLoopPreheader()) != nullptr)
+      if (DebugLoc DL = HeadBB->getTerminator()->getDebugLoc())
+        return DL;
 
     // If we have no pre-header or there are no instructions with debug
     // info in it, try the header.
     HeadBB = getHeader();
     if (HeadBB)
-      StartLoc = HeadBB->getTerminator()->getDebugLoc();
+      return HeadBB->getTerminator()->getDebugLoc();
 
-    return StartLoc;
+    return DebugLoc();
   }
 
 private:
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 7cc4a77..0490bb1 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -409,9 +409,6 @@ class PopulateLoopsDFS {
   typedef typename BlockTraits::ChildIteratorType SuccIterTy;
 
   LoopInfoBase<BlockT, LoopT> *LI;
-  DenseSet<const BlockT *> VisitedBlocks;
-  std::vector<std::pair<BlockT*, SuccIterTy> > DFSStack;
-
 public:
   PopulateLoopsDFS(LoopInfoBase<BlockT, LoopT> *li):
     LI(li) {}
@@ -420,36 +417,13 @@ public:
 
 protected:
   void insertIntoLoop(BlockT *Block);
-
-  BlockT *dfsSource() { return DFSStack.back().first; }
-  SuccIterTy &dfsSucc() { return DFSStack.back().second; }
-  SuccIterTy dfsSuccEnd() { return BlockTraits::child_end(dfsSource()); }
-
-  void pushBlock(BlockT *Block) {
-    DFSStack.push_back(std::make_pair(Block, BlockTraits::child_begin(Block)));
-  }
 };
 
 /// Top-level driver for the forward DFS within the loop.
 template<class BlockT, class LoopT>
 void PopulateLoopsDFS<BlockT, LoopT>::traverse(BlockT *EntryBlock) {
-  pushBlock(EntryBlock);
-  VisitedBlocks.insert(EntryBlock);
-  while (!DFSStack.empty()) {
-    // Traverse the leftmost path as far as possible.
-    while (dfsSucc() != dfsSuccEnd()) {
-      BlockT *BB = *dfsSucc();
-      ++dfsSucc();
-      if (!VisitedBlocks.insert(BB).second)
-        continue;
-
-      // Push the next DFS successor onto the stack.
-      pushBlock(BB);
-    }
-    // Visit the top of the stack in postorder and backtrack.
-    insertIntoLoop(dfsSource());
-    DFSStack.pop_back();
-  }
+  for (BlockT *BB : post_order(EntryBlock))
+    insertIntoLoop(BB);
 }
 
 /// Add a single Block to its ancestor loops in PostOrder. If the block is a
@@ -498,10 +472,9 @@ Analyze(DominatorTreeBase<BlockT> &DomTree) {
 
   // Postorder traversal of the dominator tree.
   DomTreeNodeBase<BlockT>* DomRoot = DomTree.getRootNode();
-  for (po_iterator<DomTreeNodeBase<BlockT>*> DomIter = po_begin(DomRoot),
-         DomEnd = po_end(DomRoot); DomIter != DomEnd; ++DomIter) {
+  for (auto DomNode : post_order(DomRoot)) {
 
-    BlockT *Header = DomIter->getBlock();
+    BlockT *Header = DomNode->getBlock();
     SmallVector<BlockT *, 4> Backedges;
 
     // Check each predecessor of the potential loop header.
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index abc2b90..c8453e9 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -329,7 +329,7 @@ namespace llvm {
 
   public:
     MemoryDependenceAnalysis();
-    ~MemoryDependenceAnalysis();
+    ~MemoryDependenceAnalysis() override;
     static char ID;
 
     /// Pass Implementation stuff.  This doesn't do any analysis eagerly.
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 8d11e80..d112ab1 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -138,6 +138,13 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
+  // createDivergenceAnalysisPass - This pass determines which branches in a GPU
+  // program are divergent.
+  //
+  FunctionPass *createDivergenceAnalysisPass();
+
+  //===--------------------------------------------------------------------===//
+  //
   // Minor pass prototypes, allowing us to expose them through bugpoint and
   // analyze.
   FunctionPass *createInstCountPass();
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index 72cd357..0f7e2b8 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -30,7 +30,7 @@ struct PostDominatorTree : public FunctionPass {
     DT = new DominatorTreeBase<BasicBlock>(true);
   }
 
-  ~PostDominatorTree();
+  ~PostDominatorTree() override;
 
   bool runOnFunction(Function &F) override;
 
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 1c7f4d3..7ceb086 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -820,8 +820,6 @@ public:
   inline RegionNode(Region *Parent, BasicBlock *Entry, bool isSubRegion = false)
       : RegionNodeBase<RegionTraits<Function>>(Parent, Entry, isSubRegion) {}
 
-  ~RegionNode() {}
-
   bool operator==(const Region &RN) const {
     return this == reinterpret_cast<const RegionNode *>(&RN);
   }
@@ -842,7 +840,7 @@ class RegionInfo : public RegionInfoBase<RegionTraits<Function>> {
 public:
   explicit RegionInfo();
 
-  virtual ~RegionInfo();
+  ~RegionInfo() override;
 
   // updateStatistics - Update statistic about created regions.
   void updateStatistics(Region *R) final;
@@ -858,7 +856,7 @@ public:
   static char ID;
   explicit RegionInfoPass();
 
-  ~RegionInfoPass();
+  ~RegionInfoPass() override;
 
   RegionInfo &getRegionInfo() { return RI; }
 
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index b0dc263..b31eefc 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -487,7 +487,7 @@ void RegionBase<Tr>::print(raw_ostream &OS, bool print_tree, unsigned level,
     OS.indent(level * 2 + 2);
 
     if (Style == PrintBB) {
-      for (const auto &BB : blocks())
+      for (const auto *BB : blocks())
         OS << BB->getName() << ", "; // TODO: remove the last ","
     } else if (Style == PrintRN) {
       for (const_element_iterator I = element_begin(), E = element_end();
@@ -714,10 +714,8 @@ void RegionInfoBase<Tr>::scanForRegions(FuncT &F, BBtoBBMap *ShortCut) {
   // regions from the bottom of the dominance tree.  If the small regions are
   // detected first, detection of bigger regions is faster, as we can jump
   // over the small regions.
-  for (po_iterator<DomTreeNodeT *> FI = po_begin(N), FE = po_end(N); FI != FE;
-       ++FI) {
-    findRegionsWithEntry(FI->getBlock(), ShortCut);
-  }
+  for (auto DomNode : post_order(N))
+    findRegionsWithEntry(DomNode->getBlock(), ShortCut);
 }
 
 template <class Tr>
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 4360414..1240b85 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -256,6 +256,10 @@ namespace llvm {
     /// Mark predicate values currently being processed by isImpliedCond.
     DenseSet<Value*> PendingLoopPredicates;
 
+    /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of
+    /// conditions dominating the backedge of a loop.
+    bool WalkingBEDominatingConds;
+
     /// ExitLimit - Information about the number of loop iterations for which a
     /// loop exit's branch condition evaluates to the not-taken path.  This is a
     /// temporary pair of exact and max expressions that are eventually
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index b0b0946..8ec2078 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -28,7 +28,7 @@ namespace llvm {
   /// all materialized values are safe to speculate.
   bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
 
-  /// SCEVExpander - This class uses information about analyze scalars to
+  /// This class uses information about analyze scalars to
   /// rewrite expressions in canonical form.
   ///
   /// Clients should create an instance of this class when rewriting is needed,
@@ -48,37 +48,36 @@ namespace llvm {
     std::set<AssertingVH<Value> > InsertedValues;
     std::set<AssertingVH<Value> > InsertedPostIncValues;
 
-    /// RelevantLoops - A memoization of the "relevant" loop for a given SCEV.
+    /// A memoization of the "relevant" loop for a given SCEV.
     DenseMap<const SCEV *, const Loop *> RelevantLoops;
 
-    /// PostIncLoops - Addrecs referring to any of the given loops are expanded
+    /// \brief Addrecs referring to any of the given loops are expanded
     /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
     /// returns the add instruction that adds one to the phi for {0,+,1}<L>,
     /// as opposed to a new phi starting at 1. This is only supported in
     /// non-canonical mode.
     PostIncLoopSet PostIncLoops;
 
-    /// IVIncInsertPos - When this is non-null, addrecs expanded in the
-    /// loop it indicates should be inserted with increments at
-    /// IVIncInsertPos.
+    /// \brief When this is non-null, addrecs expanded in the loop it indicates
+    /// should be inserted with increments at IVIncInsertPos.
     const Loop *IVIncInsertLoop;
 
-    /// IVIncInsertPos - When expanding addrecs in the IVIncInsertLoop loop,
-    /// insert the IV increment at this position.
+    /// \brief When expanding addrecs in the IVIncInsertLoop loop, insert the IV
+    /// increment at this position.
     Instruction *IVIncInsertPos;
 
-    /// Phis that complete an IV chain. Reuse
+    /// \brief Phis that complete an IV chain. Reuse
     std::set<AssertingVH<PHINode> > ChainedPhis;
 
-    /// CanonicalMode - When true, expressions are expanded in "canonical"
-    /// form. In particular, addrecs are expanded as arithmetic based on
-    /// a canonical induction variable. When false, expression are expanded
-    /// in a more literal form.
+    /// \brief When true, expressions are expanded in "canonical" form. In
+    /// particular, addrecs are expanded as arithmetic based on a canonical
+    /// induction variable. When false, expression are expanded in a more
+    /// literal form.
     bool CanonicalMode;
 
-    /// When invoked from LSR, the expander is in "strength reduction" mode. The
-    /// only difference is that phi's are only reused if they are already in
-    /// "expanded" form.
+    /// \brief When invoked from LSR, the expander is in "strength reduction"
+    /// mode. The only difference is that phi's are only reused if they are
+    /// already in "expanded" form.
     bool LSRMode;
 
     typedef IRBuilder<true, TargetFolder> BuilderType;
@@ -91,7 +90,7 @@ namespace llvm {
     friend struct SCEVVisitor<SCEVExpander, Value*>;
 
   public:
-    /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
+    /// \brief Construct a SCEVExpander in "canonical" mode.
     explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL,
                           const char *name)
         : SE(se), DL(DL), IVName(name), IVIncInsertLoop(nullptr),
@@ -106,7 +105,7 @@ namespace llvm {
     void setDebugType(const char* s) { DebugType = s; }
 #endif
 
-    /// clear - Erase the contents of the InsertedExpressions map so that users
+    /// \brief Erase the contents of the InsertedExpressions map so that users
     /// trying to expand the same expression into multiple BasicBlocks or
     /// different places within the same BasicBlock can do so.
     void clear() {
@@ -116,31 +115,38 @@ namespace llvm {
       ChainedPhis.clear();
     }
 
-    /// getOrInsertCanonicalInductionVariable - This method returns the
-    /// canonical induction variable of the specified type for the specified
-    /// loop (inserting one if there is none).  A canonical induction variable
-    /// starts at zero and steps by one on each iteration.
+    /// \brief Return true for expressions that may incur non-trivial cost to
+    /// evaluate at runtime.
+    bool isHighCostExpansion(const SCEV *Expr, Loop *L) {
+      SmallPtrSet<const SCEV *, 8> Processed;
+      return isHighCostExpansionHelper(Expr, L, Processed);
+    }
+
+    /// \brief This method returns the canonical induction variable of the
+    /// specified type for the specified loop (inserting one if there is none).
+    /// A canonical induction variable starts at zero and steps by one on each
+    /// iteration.
     PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty);
 
-    /// getIVIncOperand - Return the induction variable increment's IV operand.
+    /// \brief Return the induction variable increment's IV operand.
     Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos,
                                  bool allowScale);
 
-    /// hoistIVInc - Utility for hoisting an IV increment.
+    /// \brief Utility for hoisting an IV increment.
     bool hoistIVInc(Instruction *IncV, Instruction *InsertPos);
 
-    /// replaceCongruentIVs - replace congruent phis with their most canonical
+    /// \brief replace congruent phis with their most canonical
     /// representative. Return the number of phis eliminated.
     unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                  SmallVectorImpl<WeakVH> &DeadInsts,
                                  const TargetTransformInfo *TTI = nullptr);
 
-    /// expandCodeFor - Insert code to directly compute the specified SCEV
-    /// expression into the program.  The inserted code is inserted into the
-    /// specified block.
+    /// \brief Insert code to directly compute the specified SCEV expression
+    /// into the program.  The inserted code is inserted into the specified
+    /// block.
     Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
 
-    /// setIVIncInsertPos - Set the current IV increment loop and position.
+    /// \brief Set the current IV increment loop and position.
     void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
       assert(!CanonicalMode &&
              "IV increment positions are not supported in CanonicalMode");
@@ -148,16 +154,15 @@ namespace llvm {
       IVIncInsertPos = Pos;
     }
 
-    /// setPostInc - Enable post-inc expansion for addrecs referring to the
-    /// given loops. Post-inc expansion is only supported in non-canonical
-    /// mode.
+    /// \brief Enable post-inc expansion for addrecs referring to the given
+    /// loops. Post-inc expansion is only supported in non-canonical mode.
     void setPostInc(const PostIncLoopSet &L) {
       assert(!CanonicalMode &&
              "Post-inc expansion is not supported in CanonicalMode");
       PostIncLoops = L;
     }
 
-    /// clearPostInc - Disable all post-inc expansion.
+    /// \brief Disable all post-inc expansion.
     void clearPostInc() {
       PostIncLoops.clear();
 
@@ -166,23 +171,22 @@ namespace llvm {
       InsertedPostIncValues.clear();
     }
 
-    /// disableCanonicalMode - Disable the behavior of expanding expressions in
-    /// canonical form rather than in a more literal form. Non-canonical mode
-    /// is useful for late optimization passes.
+    /// \brief Disable the behavior of expanding expressions in canonical form
+    /// rather than in a more literal form. Non-canonical mode is useful for
+    /// late optimization passes.
     void disableCanonicalMode() { CanonicalMode = false; }
 
     void enableLSRMode() { LSRMode = true; }
 
-    /// clearInsertPoint - Clear the current insertion point. This is useful
-    /// if the instruction that had been serving as the insertion point may
-    /// have been deleted.
+    /// \brief Clear the current insertion point. This is useful if the
+    /// instruction that had been serving as the insertion point may have been
+    /// deleted.
     void clearInsertPoint() {
       Builder.ClearInsertionPoint();
     }
 
-    /// isInsertedInstruction - Return true if the specified instruction was
-    /// inserted by the code rewriter.  If so, the client should not modify the
-    /// instruction.
+    /// \brief Return true if the specified instruction was inserted by the code
+    /// rewriter.  If so, the client should not modify the instruction.
     bool isInsertedInstruction(Instruction *I) const {
       return InsertedValues.count(I) || InsertedPostIncValues.count(I);
     }
@@ -192,24 +196,27 @@ namespace llvm {
   private:
     LLVMContext &getContext() const { return SE.getContext(); }
 
-    /// InsertBinop - Insert the specified binary operator, doing a small amount
+    /// \brief Recursive helper function for isHighCostExpansion.
+    bool isHighCostExpansionHelper(const SCEV *S, Loop *L,
+                                   SmallPtrSetImpl<const SCEV *> &Processed);
+
+    /// \brief Insert the specified binary operator, doing a small amount
     /// of work to avoid inserting an obviously redundant operation.
     Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
 
-    /// ReuseOrCreateCast - Arange for there to be a cast of V to Ty at IP,
-    /// reusing an existing cast if a suitable one exists, moving an existing
-    /// cast if a suitable one exists but isn't in the right place, or
-    /// or creating a new one.
+    /// \brief Arrange for there to be a cast of V to Ty at IP, reusing an
+    /// existing cast if a suitable one exists, moving an existing cast if a
+    /// suitable one exists but isn't in the right place, or or creating a new
+    /// one.
     Value *ReuseOrCreateCast(Value *V, Type *Ty,
                              Instruction::CastOps Op,
                              BasicBlock::iterator IP);
 
-    /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
-    /// which must be possible with a noop cast, doing what we can to
-    /// share the casts.
+    /// \brief Insert a cast of V to the specified type, which must be possible
+    /// with a noop cast, doing what we can to share the casts.
     Value *InsertNoopCastOfTo(Value *V, Type *Ty);
 
-    /// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP
+    /// \brief Expand a SCEVAddExpr with a pointer type into a GEP
     /// instead of using ptrtoint+arithmetic+inttoptr.
     Value *expandAddToGEP(const SCEV *const *op_begin,
                           const SCEV *const *op_end,
@@ -217,13 +224,13 @@ namespace llvm {
 
     Value *expand(const SCEV *S);
 
-    /// expandCodeFor - Insert code to directly compute the specified SCEV
-    /// expression into the program.  The inserted code is inserted into the
-    /// SCEVExpander's current insertion point. If a type is specified, the
-    /// result will be expanded to have that type, with a cast if necessary.
+    /// \brief Insert code to directly compute the specified SCEV expression
+    /// into the program.  The inserted code is inserted into the SCEVExpander's
+    /// current insertion point. If a type is specified, the result will be
+    /// expanded to have that type, with a cast if necessary.
     Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
 
-    /// getRelevantLoop - Determine the most "relevant" loop for the given SCEV.
+    /// \brief Determine the most "relevant" loop for the given SCEV.
     const Loop *getRelevantLoop(const SCEV *);
 
     Value *visitConstant(const SCEVConstant *S) {
diff --git a/include/llvm/Analysis/TargetFolder.h b/include/llvm/Analysis/TargetFolder.h
index f691296..12bf9fe 100644
--- a/include/llvm/Analysis/TargetFolder.h
+++ b/include/llvm/Analysis/TargetFolder.h
@@ -130,34 +130,35 @@ public:
   // Memory Instructions
   //===--------------------------------------------------------------------===//
 
-  Constant *CreateGetElementPtr(Constant *C,
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C,
                                 ArrayRef<Constant *> IdxList) const {
-    return Fold(ConstantExpr::getGetElementPtr(C, IdxList));
+    return Fold(ConstantExpr::getGetElementPtr(Ty, C, IdxList));
   }
-  Constant *CreateGetElementPtr(Constant *C, Constant *Idx) const {
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C, Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return Fold(ConstantExpr::getGetElementPtr(C, Idx));
+    return Fold(ConstantExpr::getGetElementPtr(Ty, C, Idx));
   }
-  Constant *CreateGetElementPtr(Constant *C,
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C,
                                 ArrayRef<Value *> IdxList) const {
-    return Fold(ConstantExpr::getGetElementPtr(C, IdxList));
+    return Fold(ConstantExpr::getGetElementPtr(Ty, C, IdxList));
   }
 
-  Constant *CreateInBoundsGetElementPtr(Constant *C,
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         ArrayRef<Constant *> IdxList) const {
-    return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList));
+    return Fold(ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList));
   }
-  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant *Idx) const {
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
+                                        Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return Fold(ConstantExpr::getInBoundsGetElementPtr(C, Idx));
+    return Fold(ConstantExpr::getInBoundsGetElementPtr(Ty, C, Idx));
   }
-  Constant *CreateInBoundsGetElementPtr(Constant *C,
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         ArrayRef<Value *> IdxList) const {
-    return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList));
+    return Fold(ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList));
   }
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index fdb2010..f4195fb 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -190,12 +190,21 @@ public:
   /// comments for a detailed explanation of the cost values.
   unsigned getUserCost(const User *U) const;
 
-  /// \brief hasBranchDivergence - Return true if branch divergence exists.
+  /// \brief Return true if branch divergence exists.
+  ///
   /// Branch divergence has a significantly negative impact on GPU performance
   /// when threads in the same wavefront take different paths due to conditional
   /// branches.
   bool hasBranchDivergence() const;
 
+  /// \brief Returns whether V is a source of divergence.
+  ///
+  /// This function provides the target-dependent information for
+  /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
+  /// builds the dependency graph, and then runs the reachability algorithm
+  /// starting with the sources of divergence.
+  bool isSourceOfDivergence(const Value *V) const;
+
   /// \brief Test whether calls to a function lower to actual program function
   /// calls.
   ///
@@ -252,6 +261,9 @@ public:
     /// loop body even when the number of loop iterations is not known at
     /// compile time).
     bool Runtime;
+    /// Allow emitting expensive instructions (such as divisions) when computing
+    /// the trip count of a loop for runtime unrolling.
+    bool AllowExpensiveTripCount;
   };
 
   /// \brief Get target-customized preferences for the generic loop unrolling
@@ -520,6 +532,7 @@ public:
                                     ArrayRef<const Value *> Arguments) = 0;
   virtual unsigned getUserCost(const User *U) = 0;
   virtual bool hasBranchDivergence() = 0;
+  virtual bool isSourceOfDivergence(const Value *V) = 0;
   virtual bool isLoweredToCall(const Function *F) = 0;
   virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
@@ -619,6 +632,9 @@ public:
   }
   unsigned getUserCost(const User *U) override { return Impl.getUserCost(U); }
   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
+  bool isSourceOfDivergence(const Value *V) override {
+    return Impl.isSourceOfDivergence(V);
+  }
   bool isLoweredToCall(const Function *F) override {
     return Impl.isLoweredToCall(F);
   }
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index f4bf07f..b00de77 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -164,6 +164,8 @@ public:
 
   bool hasBranchDivergence() { return false; }
 
+  bool isSourceOfDivergence(const Value *V) { return false; }
+
   bool isLoweredToCall(const Function *F) {
     // FIXME: These should almost certainly not be handled here, and instead
     // handled with the help of TLI or the target itself. This was largely
@@ -408,7 +410,7 @@ public:
           ->getGEPCost(GEP->getPointerOperand(), Indices);
     }
 
-    if (ImmutableCallSite CS = U) {
+    if (auto CS = ImmutableCallSite(U)) {
       const Function *F = CS.getCalledFunction();
       if (!F) {
         // Just use the called value type.
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index 3f7a77d..96c4201 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -164,8 +164,8 @@ template <> struct isPodLike<BitCodeAbbrevOp> { static const bool value=true; };
 /// specialized format instead of the fully-general, fully-vbr, format.
 class BitCodeAbbrev : public RefCountedBase<BitCodeAbbrev> {
   SmallVector<BitCodeAbbrevOp, 32> OperandList;
-  ~BitCodeAbbrev() {}
   // Only RefCountedBase is allowed to delete.
+  ~BitCodeAbbrev() = default;
   friend class RefCountedBase<BitCodeAbbrev>;
 
 public:
diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h
index 8fe9b7e..ae915c6 100644
--- a/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -26,7 +26,11 @@ class PreservedAnalyses;
 /// \brief Create and return a pass that writes the module to the specified
 /// ostream. Note that this pass is designed for use with the legacy pass
 /// manager.
-ModulePass *createBitcodeWriterPass(raw_ostream &Str);
+///
+/// If \c ShouldPreserveUseListOrder, encode use-list order so it can be
+/// reproduced when deserialized.
+ModulePass *createBitcodeWriterPass(raw_ostream &Str,
+                                    bool ShouldPreserveUseListOrder = false);
 
 /// \brief Pass for writing a module of IR out to a bitcode file.
 ///
@@ -34,10 +38,16 @@ ModulePass *createBitcodeWriterPass(raw_ostream &Str);
 /// a pass for the legacy pass manager, use the function above.
 class BitcodeWriterPass {
   raw_ostream &OS;
+  bool ShouldPreserveUseListOrder;
 
 public:
   /// \brief Construct a bitcode writer pass around a particular output stream.
-  explicit BitcodeWriterPass(raw_ostream &OS) : OS(OS) {}
+  ///
+  /// If \c ShouldPreserveUseListOrder, encode use-list order so it can be
+  /// reproduced when deserialized.
+  explicit BitcodeWriterPass(raw_ostream &OS,
+                             bool ShouldPreserveUseListOrder = false)
+      : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
   /// \brief Run the bitcode writer pass, and output the module to the selected
   /// output stream.
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index d842167..e450db0 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -397,7 +397,8 @@ namespace bitc {
     ATTR_KIND_IN_ALLOCA = 38,
     ATTR_KIND_NON_NULL = 39,
     ATTR_KIND_JUMP_TABLE = 40,
-    ATTR_KIND_DEREFERENCEABLE = 41
+    ATTR_KIND_DEREFERENCEABLE = 41,
+    ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42
   };
 
   enum ComdatSelectionKindCodes {
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index 254949d..9d30098 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -56,11 +56,16 @@ namespace llvm {
   parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
                    DiagnosticHandlerFunction DiagnosticHandler = nullptr);
 
-  /// WriteBitcodeToFile - Write the specified module to the specified
-  /// raw output stream.  For streams where it matters, the given stream
-  /// should be in "binary" mode.
-  void WriteBitcodeToFile(const Module *M, raw_ostream &Out);
-
+  /// \brief Write the specified module to the specified raw output stream.
+  ///
+  /// For streams where it matters, the given stream should be in "binary"
+  /// mode.
+  ///
+  /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
+  /// Value in \c M.  These will be reconstructed exactly when \a M is
+  /// deserialized.
+  void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
+                          bool ShouldPreserveUseListOrder = false);
 
   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
   /// for an LLVM IR bitcode wrapper.
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index d364012..11f98ca 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -140,7 +140,7 @@ protected:
   explicit AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
 
 public:
-  virtual ~AsmPrinter();
+  ~AsmPrinter() override;
 
   DwarfDebug *getDwarfDebug() { return DD; }
   DwarfDebug *getDwarfDebug() const { return DD; }
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index e1e5112..c5efef3 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -114,6 +114,8 @@ public:
 
   bool hasBranchDivergence() { return false; }
 
+  bool isSourceOfDivergence(const Value *V) { return false; }
+
   bool isLegalAddImmediate(int64_t imm) {
     return getTLI()->isLegalAddImmediate(imm);
   }
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 869f888..a1b8e89 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -60,11 +60,11 @@
 
 namespace llvm {
 namespace GC {
-/// PointKind - The type of a collector-safe point.
+/// PointKind - Used to indicate whether the address of the call instruction
+/// or the address after the call instruction is listed in the stackmap.  For
+/// most runtimes, PostCall safepoints are appropriate.
 ///
 enum PointKind {
-  Loop,    ///< Instr is a loop (backwards branch).
-  Return,  ///< Instr is a return instruction.
   PreCall, ///< Instr is a call instruction.
   PostCall ///< Instr is the return address of a call.
 };
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index b3a8405..aa217d5 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -23,7 +23,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Metadata.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/ValueHandle.h"
 #include <unordered_map>
 #include <utility>
@@ -45,7 +45,8 @@ typedef std::pair<const MachineInstr *, const MachineInstr *> InsnRange;
 class LexicalScope {
 
 public:
-  LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
+  LexicalScope(LexicalScope *P, const MDLocalScope *D, const MDLocation *I,
+               bool A)
       : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A),
         LastInsn(nullptr), FirstInsn(nullptr), DFSIn(0), DFSOut(0) {
     assert((!D || D->isResolved()) && "Expected resolved node");
@@ -57,8 +58,8 @@ public:
   // Accessors.
   LexicalScope *getParent() const { return Parent; }
   const MDNode *getDesc() const { return Desc; }
-  const MDNode *getInlinedAt() const { return InlinedAtLocation; }
-  const MDNode *getScopeNode() const { return Desc; }
+  const MDLocation *getInlinedAt() const { return InlinedAtLocation; }
+  const MDLocalScope *getScopeNode() const { return Desc; }
   bool isAbstractScope() const { return AbstractScope; }
   SmallVectorImpl<LexicalScope *> &getChildren() { return Children; }
   SmallVectorImpl<InsnRange> &getRanges() { return Ranges; }
@@ -118,8 +119,8 @@ public:
 
 private:
   LexicalScope *Parent;                        // Parent to this scope.
-  const MDNode *Desc;                          // Debug info descriptor.
-  const MDNode *InlinedAtLocation;             // Location at which this
+  const MDLocalScope *Desc;                    // Debug info descriptor.
+  const MDLocation *InlinedAtLocation;         // Location at which this
                                                // scope is inlined.
   bool AbstractScope;                          // Abstract Scope
   SmallVector<LexicalScope *, 4> Children;     // Scopes defined in scope.
@@ -158,16 +159,16 @@ public:
   /// getMachineBasicBlocks - Populate given set using machine basic blocks
   /// which have machine instructions that belong to lexical scope identified by
   /// DebugLoc.
-  void getMachineBasicBlocks(DebugLoc DL,
+  void getMachineBasicBlocks(const MDLocation *DL,
                              SmallPtrSetImpl<const MachineBasicBlock *> &MBBs);
 
   /// dominates - Return true if DebugLoc's lexical scope dominates at least one
   /// machine instruction's lexical scope in a given machine basic block.
-  bool dominates(DebugLoc DL, MachineBasicBlock *MBB);
+  bool dominates(const MDLocation *DL, MachineBasicBlock *MBB);
 
   /// findLexicalScope - Find lexical scope, either regular or inlined, for the
   /// given DebugLoc. Return NULL if not found.
-  LexicalScope *findLexicalScope(DebugLoc DL);
+  LexicalScope *findLexicalScope(const MDLocation *DL);
 
   /// getAbstractScopesList - Return a reference to list of abstract scopes.
   ArrayRef<LexicalScope *> getAbstractScopesList() const {
@@ -175,19 +176,19 @@ public:
   }
 
   /// findAbstractScope - Find an abstract scope or return null.
-  LexicalScope *findAbstractScope(const MDNode *N) {
+  LexicalScope *findAbstractScope(const MDLocalScope *N) {
     auto I = AbstractScopeMap.find(N);
     return I != AbstractScopeMap.end() ? &I->second : nullptr;
   }
 
   /// findInlinedScope - Find an inlined scope for the given scope/inlined-at.
-  LexicalScope *findInlinedScope(const MDNode *N, const MDNode *IA) {
+  LexicalScope *findInlinedScope(const MDLocalScope *N, const MDLocation *IA) {
     auto I = InlinedLexicalScopeMap.find(std::make_pair(N, IA));
     return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
   }
 
   /// findLexicalScope - Find regular lexical scope or return null.
-  LexicalScope *findLexicalScope(const MDNode *N) {
+  LexicalScope *findLexicalScope(const MDLocalScope *N) {
     auto I = LexicalScopeMap.find(N);
     return I != LexicalScopeMap.end() ? &I->second : nullptr;
   }
@@ -196,18 +197,24 @@ public:
   void dump();
 
   /// getOrCreateAbstractScope - Find or create an abstract lexical scope.
-  LexicalScope *getOrCreateAbstractScope(const MDNode *N);
+  LexicalScope *getOrCreateAbstractScope(const MDLocalScope *Scope);
 
 private:
-  /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
+  /// getOrCreateLexicalScope - Find lexical scope for the given Scope/IA. If
   /// not available then create new lexical scope.
-  LexicalScope *getOrCreateLexicalScope(DebugLoc DL);
+  LexicalScope *getOrCreateLexicalScope(const MDLocalScope *Scope,
+                                        const MDLocation *IA = nullptr);
+  LexicalScope *getOrCreateLexicalScope(const MDLocation *DL) {
+    return DL ? getOrCreateLexicalScope(DL->getScope(), DL->getInlinedAt())
+              : nullptr;
+  }
 
   /// getOrCreateRegularScope - Find or create a regular lexical scope.
-  LexicalScope *getOrCreateRegularScope(MDNode *Scope);
+  LexicalScope *getOrCreateRegularScope(const MDLocalScope *Scope);
 
   /// getOrCreateInlinedScope - Find or create an inlined lexical scope.
-  LexicalScope *getOrCreateInlinedScope(MDNode *Scope, MDNode *InlinedAt);
+  LexicalScope *getOrCreateInlinedScope(const MDLocalScope *Scope,
+                                        const MDLocation *InlinedAt);
 
   /// extractLexicalScopes - Extract instruction ranges for each lexical scopes
   /// for the given machine function.
@@ -223,17 +230,18 @@ private:
 
   /// LexicalScopeMap - Tracks the scopes in the current function.
   // Use an unordered_map to ensure value pointer validity over insertion.
-  std::unordered_map<const MDNode *, LexicalScope> LexicalScopeMap;
+  std::unordered_map<const MDLocalScope *, LexicalScope> LexicalScopeMap;
 
   /// InlinedLexicalScopeMap - Tracks inlined function scopes in current
   /// function.
-  std::unordered_map<std::pair<const MDNode *, const MDNode *>, LexicalScope,
-                     pair_hash<const MDNode *, const MDNode *>>
-  InlinedLexicalScopeMap;
+  std::unordered_map<std::pair<const MDLocalScope *, const MDLocation *>,
+                     LexicalScope,
+                     pair_hash<const MDLocalScope *, const MDLocation *>>
+      InlinedLexicalScopeMap;
 
   /// AbstractScopeMap - These scopes are  not included LexicalScopeMap.
   // Use an unordered_map to ensure value pointer validity over insertion.
-  std::unordered_map<const MDNode *, LexicalScope> AbstractScopeMap;
+  std::unordered_map<const MDLocalScope *, LexicalScope> AbstractScopeMap;
 
   /// AbstractScopesList - Tracks abstract scopes constructed while processing
   /// a function.
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index dc52c0a..9673f80 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -100,7 +100,7 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
   public:
     static char ID; // Pass identification, replacement for typeid
     LiveIntervals();
-    virtual ~LiveIntervals();
+    ~LiveIntervals() override;
 
     // Calculate the spill weight to assign to a single instruction.
     static float getSpillWeight(bool isDef, bool isUse,
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 44c3c4e..de855f2 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -122,7 +122,7 @@ public:
     MRI.setDelegate(this);
   }
 
-  ~LiveRangeEdit() { MRI.resetDelegate(this); }
+  ~LiveRangeEdit() override { MRI.resetDelegate(this); }
 
   LiveInterval &getParent() const {
    assert(Parent && "No parent LiveInterval");
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 1aef689..feb394e 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -35,7 +35,7 @@ public:
 
   MachineBlockFrequencyInfo();
 
-  ~MachineBlockFrequencyInfo();
+  ~MachineBlockFrequencyInfo() override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 19f4e2d..4428fa6 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -72,7 +72,7 @@ public:
 
   MachineDominatorTree();
 
-  ~MachineDominatorTree();
+  ~MachineDominatorTree() override;
 
   DominatorTreeBase<MachineBasicBlock> &getBase() {
     applySplitCriticalEdges();
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 6677360..1e7fee6 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -516,10 +516,6 @@ public:
   /// on the stack.  Returns an index with a negative value.
   int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset);
 
-  /// Allocates memory at a fixed, target-specific offset from the frame
-  /// pointer. Marks the function as having its frame address taken.
-  int CreateFrameAllocation(uint64_t Size);
-
   /// isFixedObjectIndex - Returns true if the specified index corresponds to a
   /// fixed stack object.
   bool isFixedObjectIndex(int ObjectIdx) const {
diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 36f1c66..023eeb1 100644
--- a/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -31,7 +31,7 @@ private:
 public:
   static char ID;
   explicit MachineFunctionAnalysis(const TargetMachine &tm);
-  ~MachineFunctionAnalysis();
+  ~MachineFunctionAnalysis() override;
 
   MachineFunction &getMF() const { return *MF; }
 
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 333dcdb..9097150 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -248,9 +248,7 @@ public:
   /// this DBG_VALUE instruction.
   DIVariable getDebugVariable() const {
     assert(isDebugValue() && "not a DBG_VALUE");
-    DIVariable Var(getOperand(2).getMetadata());
-    assert(Var.Verify() && "not a DIVariable");
-    return Var;
+    return cast<MDLocalVariable>(getOperand(2).getMetadata());
   }
 
   /// \brief Return the complex address expression referenced by
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index e6cb494..0574ebc 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -174,7 +174,8 @@ public:
 
   const MachineInstrBuilder &addMetadata(const MDNode *MD) const {
     MI->addOperand(*MF, MachineOperand::CreateMetadata(MD));
-    assert((MI->isDebugValue() ? MI->getDebugVariable().Verify() : true) &&
+    assert((MI->isDebugValue() ? static_cast<bool>(MI->getDebugVariable())
+                               : true) &&
            "first MDNode argument of a DBG_VALUE not a DIVariable");
     return *this;
   }
@@ -355,8 +356,10 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL,
                                    const MCInstrDesc &MCID, bool IsIndirect,
                                    unsigned Reg, unsigned Offset,
                                    const MDNode *Variable, const MDNode *Expr) {
-  assert(DIVariable(Variable).Verify() && "not a DIVariable");
-  assert(DIExpression(Expr)->isValid() && "not a DIExpression");
+  assert(isa<MDLocalVariable>(Variable) && "not a DIVariable");
+  assert(cast<MDExpression>(Expr)->isValid() && "not a DIExpression");
+  assert(cast<MDLocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   if (IsIndirect)
     return BuildMI(MF, DL, MCID)
         .addReg(Reg, RegState::Debug)
@@ -382,8 +385,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    const MCInstrDesc &MCID, bool IsIndirect,
                                    unsigned Reg, unsigned Offset,
                                    const MDNode *Variable, const MDNode *Expr) {
-  assert(DIVariable(Variable).Verify() && "not a DIVariable");
-  assert(DIExpression(Expr)->isValid() && "not a DIExpression");
+  assert(isa<MDLocalVariable>(Variable) && "not a DIVariable");
+  assert(cast<MDExpression>(Expr)->isValid() && "not a DIExpression");
   MachineFunction &MF = *BB.getParent();
   MachineInstr *MI =
       BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr);
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index f171df2..3965b1d 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -58,22 +58,25 @@ class MachineFunction;
 class Module;
 class PointerType;
 class StructType;
+struct WinEHFuncInfo;
 
 //===----------------------------------------------------------------------===//
 /// LandingPadInfo - This structure is used to retain landing pad info for
 /// the current function.
 ///
 struct LandingPadInfo {
-  MachineBasicBlock *LandingPadBlock;    // Landing pad block.
-  SmallVector<MCSymbol*, 1> BeginLabels; // Labels prior to invoke.
-  SmallVector<MCSymbol*, 1> EndLabels;   // Labels after invoke.
-  SmallVector<MCSymbol*, 1> ClauseLabels; // Labels for each clause.
-  MCSymbol *LandingPadLabel;             // Label at beginning of landing pad.
-  const Function *Personality;           // Personality function.
-  std::vector<int> TypeIds;              // List of type ids (filters negative)
+  MachineBasicBlock *LandingPadBlock;      // Landing pad block.
+  SmallVector<MCSymbol *, 1> BeginLabels;  // Labels prior to invoke.
+  SmallVector<MCSymbol *, 1> EndLabels;    // Labels after invoke.
+  SmallVector<MCSymbol *, 1> ClauseLabels; // Labels for each clause.
+  MCSymbol *LandingPadLabel;               // Label at beginning of landing pad.
+  const Function *Personality;             // Personality function.
+  std::vector<int> TypeIds;               // List of type ids (filters negative).
+  int WinEHState;                         // WinEH specific state number.
 
   explicit LandingPadInfo(MachineBasicBlock *MBB)
-    : LandingPadBlock(MBB), LandingPadLabel(nullptr), Personality(nullptr) {}
+      : LandingPadBlock(MBB), LandingPadLabel(nullptr), Personality(nullptr),
+        WinEHState(-1) {}
 };
 
 //===----------------------------------------------------------------------===//
@@ -88,7 +91,10 @@ public:
   virtual ~MachineModuleInfoImpl();
   typedef std::vector<std::pair<MCSymbol*, StubValueTy> > SymbolListTy;
 protected:
-  static SymbolListTy GetSortedStubs(const DenseMap<MCSymbol*, StubValueTy>&);
+
+  /// Return the entries from a DenseMap in a deterministic sorted orer.
+  /// Clears the map.
+  static SymbolListTy getSortedStubs(DenseMap<MCSymbol*, StubValueTy>&);
 };
 
 //===----------------------------------------------------------------------===//
@@ -172,16 +178,19 @@ class MachineModuleInfo : public ImmutablePass {
 
   EHPersonality PersonalityTypeCache;
 
+  DenseMap<const Function *, std::unique_ptr<WinEHFuncInfo>> FuncInfoMap;
+
 public:
   static char ID; // Pass identification, replacement for typeid
 
   struct VariableDbgInfo {
-    TrackingMDNodeRef Var;
-    TrackingMDNodeRef Expr;
+    const MDLocalVariable *Var;
+    const MDExpression *Expr;
     unsigned Slot;
-    DebugLoc Loc;
+    const MDLocation *Loc;
 
-    VariableDbgInfo(MDNode *Var, MDNode *Expr, unsigned Slot, DebugLoc Loc)
+    VariableDbgInfo(const MDLocalVariable *Var, const MDExpression *Expr,
+                    unsigned Slot, const MDLocation *Loc)
         : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
   };
   typedef SmallVector<VariableDbgInfo, 4> VariableDbgInfoMapTy;
@@ -191,7 +200,7 @@ public:
   // Real constructor.
   MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI,
                     const MCObjectFileInfo *MOFI);
-  ~MachineModuleInfo();
+  ~MachineModuleInfo() override;
 
   // Initialization and Finalization
   bool doInitialization(Module &) override;
@@ -207,6 +216,12 @@ public:
   void setModule(const Module *M) { TheModule = M; }
   const Module *getModule() const { return TheModule; }
 
+  const Function *getWinEHParent(const Function *F) const;
+  WinEHFuncInfo &getWinEHFuncInfo(const Function *F);
+  bool hasWinEHFuncInfo(const Function *F) const {
+    return FuncInfoMap.count(getWinEHParent(F)) > 0;
+  }
+
   /// getInfo - Keep track of various per-function pieces of information for
   /// backends that would like to do so.
   ///
@@ -304,6 +319,8 @@ public:
   void addPersonality(MachineBasicBlock *LandingPad,
                       const Function *Personality);
 
+  void addWinEHState(MachineBasicBlock *LandingPad, int State);
+
   /// getPersonalityIndex - Get index of the current personality function inside
   /// Personalitites array
   unsigned getPersonalityIndex() const;
@@ -421,8 +438,8 @@ public:
 
   /// setVariableDbgInfo - Collect information used to emit debugging
   /// information of a variable.
-  void setVariableDbgInfo(MDNode *Var, MDNode *Expr, unsigned Slot,
-                          DebugLoc Loc) {
+  void setVariableDbgInfo(const MDLocalVariable *Var, const MDExpression *Expr,
+                          unsigned Slot, const MDLocation *Loc) {
     VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
   }
 
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 7afc7eb..a67f9b5 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -58,14 +58,14 @@ namespace llvm {
     }
 
     /// Accessor methods to return the set of stubs in sorted order.
-    SymbolListTy GetFnStubList() const {
-      return GetSortedStubs(FnStubs);
+    SymbolListTy GetFnStubList() {
+      return getSortedStubs(FnStubs);
     }
-    SymbolListTy GetGVStubList() const {
-      return GetSortedStubs(GVStubs);
+    SymbolListTy GetGVStubList() {
+      return getSortedStubs(GVStubs);
     }
-    SymbolListTy GetHiddenGVStubList() const {
-      return GetSortedStubs(HiddenGVStubs);
+    SymbolListTy GetHiddenGVStubList() {
+      return getSortedStubs(HiddenGVStubs);
     }
   };
 
@@ -87,8 +87,8 @@ namespace llvm {
 
     /// Accessor methods to return the set of stubs in sorted order.
 
-    SymbolListTy GetGVStubList() const {
-      return GetSortedStubs(GVStubs);
+    SymbolListTy GetGVStubList() {
+      return getSortedStubs(GVStubs);
     }
   };
 
diff --git a/include/llvm/CodeGen/MachinePassRegistry.h b/include/llvm/CodeGen/MachinePassRegistry.h
index 57d1a6d..6731983 100644
--- a/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/include/llvm/CodeGen/MachinePassRegistry.h
@@ -124,7 +124,7 @@ class RegisterPassParser : public MachinePassRegistryListener,
 public:
   RegisterPassParser(cl::Option &O)
       : cl::parser<typename RegistryClass::FunctionPassCtor>(O) {}
-  ~RegisterPassParser() { RegistryClass::setListener(nullptr); }
+  ~RegisterPassParser() override { RegistryClass::setListener(nullptr); }
 
   void initialize() {
     cl::parser<typename RegistryClass::FunctionPassCtor>::initialize();
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index aab5c40..70bdb19 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -33,7 +33,7 @@ public:
 
   MachinePostDominatorTree();
 
-  ~MachinePostDominatorTree();
+  ~MachinePostDominatorTree() override;
 
   FunctionPass *createMachinePostDominatorTreePass();
 
diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h
index 43499db..cf49c29 100644
--- a/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/include/llvm/CodeGen/MachineRegionInfo.h
@@ -57,8 +57,6 @@ public:
 
   }
 
-  ~MachineRegionNode() { }
-
   bool operator==(const MachineRegion &RN) const {
     return this == reinterpret_cast<const MachineRegionNode*>(&RN);
   }
@@ -80,7 +78,7 @@ class MachineRegionInfo : public RegionInfoBase<RegionTraits<MachineFunction>> {
 public:
   explicit MachineRegionInfo();
 
-  virtual ~MachineRegionInfo();
+  ~MachineRegionInfo() override;
 
   // updateStatistics - Update statistic about created regions.
   void updateStatistics(MachineRegion *R) final;
@@ -98,7 +96,7 @@ public:
   static char ID;
   explicit MachineRegionInfoPass();
 
-  ~MachineRegionInfoPass();
+  ~MachineRegionInfoPass() override;
 
   MachineRegionInfo &getRegionInfo() {
     return RI;
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 001d09f..e5b837a 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -620,22 +620,25 @@ public:
 
   /// setRegAllocationHint - Specify a register allocation hint for the
   /// specified virtual register.
-  void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) {
-    RegAllocHints[Reg].first  = Type;
-    RegAllocHints[Reg].second = PrefReg;
+  void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) {
+    assert(TargetRegisterInfo::isVirtualRegister(VReg));
+    RegAllocHints[VReg].first  = Type;
+    RegAllocHints[VReg].second = PrefReg;
   }
 
   /// getRegAllocationHint - Return the register allocation hint for the
   /// specified virtual register.
   std::pair<unsigned, unsigned>
-  getRegAllocationHint(unsigned Reg) const {
-    return RegAllocHints[Reg];
+  getRegAllocationHint(unsigned VReg) const {
+    assert(TargetRegisterInfo::isVirtualRegister(VReg));
+    return RegAllocHints[VReg];
   }
 
   /// getSimpleHint - Return the preferred register allocation hint, or 0 if a
   /// standard simple hint (Type == 0) is not set.
-  unsigned getSimpleHint(unsigned Reg) const {
-    std::pair<unsigned, unsigned> Hint = getRegAllocationHint(Reg);
+  unsigned getSimpleHint(unsigned VReg) const {
+    assert(TargetRegisterInfo::isVirtualRegister(VReg));
+    std::pair<unsigned, unsigned> Hint = getRegAllocationHint(VReg);
     return Hint.first ? 0 : Hint.second;
   }
 
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index a319401..e80e14e 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -385,7 +385,7 @@ public:
         ShouldTrackPressure(false), RPTracker(RegPressure),
         TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
 
-  virtual ~ScheduleDAGMILive();
+  ~ScheduleDAGMILive() override;
 
   /// Return true if this DAG supports VReg liveness and RegPressure.
   bool hasVRegLiveness() const override { return true; }
@@ -909,7 +909,7 @@ public:
   PostGenericScheduler(const MachineSchedContext *C):
     GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
 
-  virtual ~PostGenericScheduler() {}
+  ~PostGenericScheduler() override {}
 
   void initPolicy(MachineBasicBlock::iterator Begin,
                   MachineBasicBlock::iterator End,
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 48e1f21..2505c04 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -125,7 +125,7 @@ public:
   // Dummy constructor.
   TargetPassConfig();
 
-  virtual ~TargetPassConfig();
+  ~TargetPassConfig() override;
 
   static char ID;
 
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index cc9e000..fcb6fee 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -35,21 +35,6 @@ struct RegisterPressure {
   SmallVector<unsigned,8> LiveInRegs;
   SmallVector<unsigned,8> LiveOutRegs;
 
-  /// Increase register pressure for each pressure set impacted by this register
-  /// class. Normally called by RegPressureTracker, but may be called manually
-  /// to account for live through (global liveness).
-  ///
-  /// \param Reg is either a virtual register number or register unit number.
-  void increase(unsigned Reg, const TargetRegisterInfo *TRI,
-                const MachineRegisterInfo *MRI);
-
-  /// Decrease register pressure for each pressure set impacted by this register
-  /// class. This is only useful to account for spilling or rematerialization.
-  ///
-  /// \param Reg is either a virtual register number or register unit number.
-  void decrease(unsigned Reg, const TargetRegisterInfo *TRI,
-                const MachineRegisterInfo *MRI);
-
   void dump(const TargetRegisterInfo *TRI) const;
 };
 
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 00dd8f9..1196783 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -158,7 +158,7 @@ namespace llvm {
                                bool RemoveKillFlags = false,
                                LiveIntervals *LIS = nullptr);
 
-    virtual ~ScheduleDAGInstrs() {}
+    ~ScheduleDAGInstrs() override {}
 
     bool isPostRA() const { return IsPostRA; }
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index dc1c80d..582febd 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -41,14 +41,16 @@ class TargetSelectionDAGInfo;
 
 class SDVTListNode : public FoldingSetNode {
   friend struct FoldingSetTrait<SDVTListNode>;
-  /// FastID - A reference to an Interned FoldingSetNodeID for this node.
+  /// A reference to an Interned FoldingSetNodeID for this node.
   /// The Allocator in SelectionDAG holds the data.
   /// SDVTList contains all types which are frequently accessed in SelectionDAG.
-  /// The size of this list is not expected big so it won't introduce memory penalty.
+  /// The size of this list is not expected to be big so it won't introduce
+  /// a memory penalty.
   FoldingSetNodeIDRef FastID;
   const EVT *VTs;
   unsigned int NumVTs;
-  /// The hash value for SDVTList is fixed so cache it to avoid hash calculation
+  /// The hash value for SDVTList is fixed, so cache it to avoid
+  /// hash calculation.
   unsigned HashValue;
 public:
   SDVTListNode(const FoldingSetNodeIDRef ID, const EVT *VT, unsigned int Num) :
@@ -61,8 +63,8 @@ public:
   }
 };
 
-// Specialize FoldingSetTrait for SDVTListNode
-// To avoid computing temp FoldingSetNodeID and hash value.
+/// Specialize FoldingSetTrait for SDVTListNode
+/// to avoid computing temp FoldingSetNodeID and hash value.
 template<> struct FoldingSetTrait<SDVTListNode> : DefaultFoldingSetTrait<SDVTListNode> {
   static void Profile(const SDVTListNode &X, FoldingSetNodeID& ID) {
     ID = X.FastID;
@@ -98,7 +100,7 @@ private:
   static void createNode(const SDNode &);
 };
 
-/// SDDbgInfo - Keeps track of dbg_value information through SDISel.  We do
+/// Keeps track of dbg_value information through SDISel.  We do
 /// not build SDNodes for these so as not to perturb the generated code;
 /// instead the info is kept off to the side in this structure. Each SDNode may
 /// have one or more associated dbg_value entries. This information is kept in
@@ -159,10 +161,10 @@ public:
 class SelectionDAG;
 void checkForCycles(const SelectionDAG *DAG, bool force = false);
 
-/// SelectionDAG class - This is used to represent a portion of an LLVM function
-/// in a low-level Data Dependence DAG representation suitable for instruction
-/// selection.  This DAG is constructed as the first step of instruction
-/// selection in order to allow implementation of machine specific optimizations
+/// This is used to represent a portion of an LLVM function in a low-level
+/// Data Dependence DAG representation suitable for instruction selection.
+/// This DAG is constructed as the first step of instruction selection in order
+/// to allow implementation of machine specific optimizations
 /// and code simplifications.
 ///
 /// The representation used by the SelectionDAG is a target-independent
@@ -178,40 +180,39 @@ class SelectionDAG {
   LLVMContext *Context;
   CodeGenOpt::Level OptLevel;
 
-  /// EntryNode - The starting token.
+  /// The starting token.
   SDNode EntryNode;
 
-  /// Root - The root of the entire DAG.
+  /// The root of the entire DAG.
   SDValue Root;
 
-  /// AllNodes - A linked list of nodes in the current DAG.
+  /// A linked list of nodes in the current DAG.
   ilist<SDNode> AllNodes;
 
-  /// NodeAllocatorType - The AllocatorType for allocating SDNodes. We use
+  /// The AllocatorType for allocating SDNodes. We use
   /// pool allocation with recycling.
   typedef RecyclingAllocator<BumpPtrAllocator, SDNode, sizeof(LargestSDNode),
                              AlignOf<MostAlignedSDNode>::Alignment>
     NodeAllocatorType;
 
-  /// NodeAllocator - Pool allocation for nodes.
+  /// Pool allocation for nodes.
   NodeAllocatorType NodeAllocator;
 
-  /// CSEMap - This structure is used to memoize nodes, automatically performing
+  /// This structure is used to memoize nodes, automatically performing
   /// CSE with existing nodes when a duplicate is requested.
   FoldingSet<SDNode> CSEMap;
 
-  /// OperandAllocator - Pool allocation for machine-opcode SDNode operands.
+  /// Pool allocation for machine-opcode SDNode operands.
   BumpPtrAllocator OperandAllocator;
 
-  /// Allocator - Pool allocation for misc. objects that are created once per
-  /// SelectionDAG.
+  /// Pool allocation for misc. objects that are created once per SelectionDAG.
   BumpPtrAllocator Allocator;
 
-  /// DbgInfo - Tracks dbg_value information through SDISel.
+  /// Tracks dbg_value information through SDISel.
   SDDbgInfo *DbgInfo;
 
 public:
-  /// DAGUpdateListener - Clients of various APIs that cause global effects on
+  /// Clients of various APIs that cause global effects on
   /// the DAG can optionally implement this interface.  This allows the clients
   /// to handle the various sorts of updates that happen.
   ///
@@ -232,15 +233,15 @@ public:
       DAG.UpdateListeners = Next;
     }
 
-    /// NodeDeleted - The node N that was deleted and, if E is not null, an
+    /// The node N that was deleted and, if E is not null, an
     /// equivalent node E that replaced it.
     virtual void NodeDeleted(SDNode *N, SDNode *E);
 
-    /// NodeUpdated - The node N that was updated.
+    /// The node N that was updated.
     virtual void NodeUpdated(SDNode *N);
   };
 
-  /// NewNodesMustHaveLegalTypes - When true, additional steps are taken to
+  /// When true, additional steps are taken to
   /// ensure that getConstant() and similar functions return DAG nodes that
   /// have legal types. This is important after type legalization since
   /// any illegally typed nodes generated after this point will not experience
@@ -251,13 +252,12 @@ private:
   /// DAGUpdateListener is a friend so it can manipulate the listener stack.
   friend struct DAGUpdateListener;
 
-  /// UpdateListeners - Linked list of registered DAGUpdateListener instances.
+  /// Linked list of registered DAGUpdateListener instances.
   /// This stack is maintained by DAGUpdateListener RAII.
   DAGUpdateListener *UpdateListeners;
 
-  /// setGraphColorHelper - Implementation of setSubgraphColor.
+  /// Implementation of setSubgraphColor.
   /// Return whether we had to truncate the search.
-  ///
   bool setSubgraphColorHelper(SDNode *N, const char *Color,
                               DenseSet<SDNode *> &visited,
                               int level, bool &printed);
@@ -269,14 +269,11 @@ public:
   explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level);
   ~SelectionDAG();
 
-  /// init - Prepare this SelectionDAG to process code in the given
-  /// MachineFunction.
-  ///
+  /// Prepare this SelectionDAG to process code in the given MachineFunction.
   void init(MachineFunction &mf);
 
-  /// clear - Clear state and free memory necessary to make this
+  /// Clear state and free memory necessary to make this
   /// SelectionDAG ready to process a new block.
-  ///
   void clear();
 
   MachineFunction &getMachineFunction() const { return *MF; }
@@ -286,8 +283,7 @@ public:
   const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return *TSI; }
   LLVMContext *getContext() const {return Context; }
 
-  /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
-  ///
+  /// Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
   void viewGraph(const std::string &Title);
   void viewGraph();
 
@@ -295,24 +291,21 @@ public:
   std::map<const SDNode *, std::string> NodeGraphAttrs;
 #endif
 
-  /// clearGraphAttrs - Clear all previously defined node graph attributes.
+  /// Clear all previously defined node graph attributes.
   /// Intended to be used from a debugging tool (eg. gdb).
   void clearGraphAttrs();
 
-  /// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
-  ///
+  /// Set graph attributes for a node. (eg. "color=red".)
   void setGraphAttrs(const SDNode *N, const char *Attrs);
 
-  /// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+  /// Get graph attributes for a node. (eg. "color=red".)
   /// Used from getNodeAttributes.
   const std::string getGraphAttrs(const SDNode *N) const;
 
-  /// setGraphColor - Convenience for setting node color attribute.
-  ///
+  /// Convenience for setting node color attribute.
   void setGraphColor(const SDNode *N, const char *Color);
 
-  /// setGraphColor - Convenience for setting subgraph color attribute.
-  ///
+  /// Convenience for setting subgraph color attribute.
   void setSubgraphColor(SDNode *N, const char *Color);
 
   typedef ilist<SDNode>::const_iterator allnodes_const_iterator;
@@ -325,17 +318,15 @@ public:
     return AllNodes.size();
   }
 
-  /// getRoot - Return the root tag of the SelectionDAG.
-  ///
+  /// Return the root tag of the SelectionDAG.
   const SDValue &getRoot() const { return Root; }
 
-  /// getEntryNode - Return the token chain corresponding to the entry of the
-  /// function.
+  /// Return the token chain corresponding to the entry of the function.
   SDValue getEntryNode() const {
     return SDValue(const_cast<SDNode *>(&EntryNode), 0);
   }
 
-  /// setRoot - Set the current root tag of the SelectionDAG.
+  /// Set the current root tag of the SelectionDAG.
   ///
   const SDValue &setRoot(SDValue N) {
     assert((!N.getNode() || N.getValueType() == MVT::Other) &&
@@ -348,22 +339,22 @@ public:
     return Root;
   }
 
-  /// Combine - This iterates over the nodes in the SelectionDAG, folding
+  /// This iterates over the nodes in the SelectionDAG, folding
   /// certain types of nodes together, or eliminating superfluous nodes.  The
   /// Level argument controls whether Combine is allowed to produce nodes and
   /// types that are illegal on the target.
   void Combine(CombineLevel Level, AliasAnalysis &AA,
                CodeGenOpt::Level OptLevel);
 
-  /// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
-  /// only uses types natively supported by the target.  Returns "true" if it
-  /// made any changes.
+  /// This transforms the SelectionDAG into a SelectionDAG that
+  /// only uses types natively supported by the target.
+  /// Returns "true" if it made any changes.
   ///
   /// Note that this is an involved process that may invalidate pointers into
   /// the graph.
   bool LegalizeTypes();
 
-  /// Legalize - This transforms the SelectionDAG into a SelectionDAG that is
+  /// This transforms the SelectionDAG into a SelectionDAG that is
   /// compatible with the target instruction selector, as indicated by the
   /// TargetLowering object.
   ///
@@ -392,7 +383,7 @@ public:
   /// UpdatedNodes with any new nodes replacing those originally in the DAG.
   bool LegalizeOp(SDNode *N, SmallSetVector<SDNode *, 16> &UpdatedNodes);
 
-  /// LegalizeVectors - This transforms the SelectionDAG into a SelectionDAG
+  /// This transforms the SelectionDAG into a SelectionDAG
   /// that only uses vector math operations supported by the target.  This is
   /// necessary as a separate step from Legalize because unrolling a vector
   /// operation can introduce illegal types, which requires running
@@ -405,16 +396,14 @@ public:
   /// the graph.
   bool LegalizeVectors();
 
-  /// RemoveDeadNodes - This method deletes all unreachable nodes in the
-  /// SelectionDAG.
+  /// This method deletes all unreachable nodes in the SelectionDAG.
   void RemoveDeadNodes();
 
-  /// DeleteNode - Remove the specified node from the system.  This node must
+  /// Remove the specified node from the system.  This node must
   /// have no referrers.
   void DeleteNode(SDNode *N);
 
-  /// getVTList - Return an SDVTList that represents the list of values
-  /// specified.
+  /// Return an SDVTList that represents the list of values specified.
   SDVTList getVTList(EVT VT);
   SDVTList getVTList(EVT VT1, EVT VT2);
   SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3);
@@ -561,10 +550,9 @@ public:
                            SDValue STy,
                            SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
 
-  /// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node.  The number of
-  /// elements in VT, which must be a vector type, must match the number of
-  /// mask elements NumElts.  A integer mask element equal to -1 is treated as
-  /// undefined.
+  /// Return an ISD::VECTOR_SHUFFLE node. The number of elements in VT,
+  /// which must be a vector type, must match the number of mask elements
+  /// NumElts. An integer mask element equal to -1 is treated as undefined.
   SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2,
                            const int *MaskElts);
   SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2,
@@ -580,63 +568,62 @@ public:
   /// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3>
   SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV);
 
-  /// getAnyExtOrTrunc - Convert Op, which must be of integer type, to the
+  /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either any-extending or truncating it.
   SDValue getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT);
 
-  /// getSExtOrTrunc - Convert Op, which must be of integer type, to the
+  /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either sign-extending or truncating it.
   SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT);
 
-  /// getZExtOrTrunc - Convert Op, which must be of integer type, to the
+  /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either zero-extending or truncating it.
   SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT);
 
-  /// getZeroExtendInReg - Return the expression required to zero extend the Op
+  /// Return the expression required to zero extend the Op
   /// value assuming it was the smaller SrcTy value.
   SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy);
 
-  /// getAnyExtendVectorInReg - Return an operation which will any-extend the
-  /// low lanes of the operand into the specified vector type. For example,
+  /// Return an operation which will any-extend the low lanes of the operand
+  /// into the specified vector type. For example,
   /// this can convert a v16i8 into a v4i32 by any-extending the low four
   /// lanes of the operand from i8 to i32.
   SDValue getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
 
-  /// getSignExtendVectorInReg - Return an operation which will sign extend the
-  /// low lanes of the operand into the specified vector type. For example,
+  /// Return an operation which will sign extend the low lanes of the operand
+  /// into the specified vector type. For example,
   /// this can convert a v16i8 into a v4i32 by sign extending the low four
   /// lanes of the operand from i8 to i32.
   SDValue getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
 
-  /// getZeroExtendVectorInReg - Return an operation which will zero extend the
-  /// low lanes of the operand into the specified vector type. For example,
+  /// Return an operation which will zero extend the low lanes of the operand
+  /// into the specified vector type. For example,
   /// this can convert a v16i8 into a v4i32 by zero extending the low four
   /// lanes of the operand from i8 to i32.
   SDValue getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
 
-  /// getBoolExtOrTrunc - Convert Op, which must be of integer type, to the
-  /// integer type VT, by using an extension appropriate for the target's
+  /// Convert Op, which must be of integer type, to the integer type VT,
+  /// by using an extension appropriate for the target's
   /// BooleanContent for type OpVT or truncating it.
   SDValue getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, EVT OpVT);
 
-  /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+  /// Create a bitwise NOT operation as (XOR Val, -1).
   SDValue getNOT(SDLoc DL, SDValue Val, EVT VT);
 
   /// \brief Create a logical NOT operation as (XOR Val, BooleanOne).
   SDValue getLogicalNOT(SDLoc DL, SDValue Val, EVT VT);
 
-  /// getCALLSEQ_START - Return a new CALLSEQ_START node, which always must have
-  /// a glue result (to ensure it's not CSE'd).  CALLSEQ_START does not have a
-  /// useful SDLoc.
+  /// Return a new CALLSEQ_START node, which always must have a glue result
+  /// (to ensure it's not CSE'd).  CALLSEQ_START does not have a useful SDLoc.
   SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL) {
     SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain,  Op };
     return getNode(ISD::CALLSEQ_START, DL, VTs, Ops);
   }
 
-  /// getCALLSEQ_END - Return a new CALLSEQ_END node, which always must have a
-  /// glue result (to ensure it's not CSE'd).  CALLSEQ_END does not have
-  /// a useful SDLoc.
+  /// Return a new CALLSEQ_END node, which always must have a
+  /// glue result (to ensure it's not CSE'd).
+  /// CALLSEQ_END does not have a useful SDLoc.
   SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2,
                            SDValue InGlue, SDLoc DL) {
     SDVTList NodeTys = getVTList(MVT::Other, MVT::Glue);
@@ -649,18 +636,17 @@ public:
     return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops);
   }
 
-  /// getUNDEF - Return an UNDEF node.  UNDEF does not have a useful SDLoc.
+  /// Return an UNDEF node. UNDEF does not have a useful SDLoc.
   SDValue getUNDEF(EVT VT) {
     return getNode(ISD::UNDEF, SDLoc(), VT);
   }
 
-  /// getGLOBAL_OFFSET_TABLE - Return a GLOBAL_OFFSET_TABLE node.  This does
-  /// not have a useful SDLoc.
+  /// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
   SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
     return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);
   }
 
-  /// getNode - Gets or creates the specified node.
+  /// Gets or creates the specified node.
   ///
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT);
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N);
@@ -692,27 +678,26 @@ public:
                   SDValue N1, SDValue N2, SDValue N3, SDValue N4,
                   SDValue N5);
 
-  /// getStackArgumentTokenFactor - Compute a TokenFactor to force all
-  /// the incoming stack arguments to be loaded from the stack. This is
-  /// used in tail call lowering to protect stack arguments from being
-  /// clobbered.
+  /// Compute a TokenFactor to force all the incoming stack arguments to be
+  /// loaded from the stack. This is used in tail call lowering to protect
+  /// stack arguments from being clobbered.
   SDValue getStackArgumentTokenFactor(SDValue Chain);
 
   SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src,
                     SDValue Size, unsigned Align, bool isVol, bool AlwaysInline,
-                    MachinePointerInfo DstPtrInfo,
+                    bool isTailCall, MachinePointerInfo DstPtrInfo,
                     MachinePointerInfo SrcPtrInfo);
 
   SDValue getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src,
-                     SDValue Size, unsigned Align, bool isVol,
+                     SDValue Size, unsigned Align, bool isVol, bool isTailCall,
                      MachinePointerInfo DstPtrInfo,
                      MachinePointerInfo SrcPtrInfo);
 
   SDValue getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src,
-                    SDValue Size, unsigned Align, bool isVol,
+                    SDValue Size, unsigned Align, bool isVol, bool isTailCall,
                     MachinePointerInfo DstPtrInfo);
 
-  /// getSetCC - Helper function to make it easier to build SetCC's if you just
+  /// Helper function to make it easier to build SetCC's if you just
   /// have an ISD::CondCode instead of an SDValue.
   ///
   SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
@@ -726,8 +711,8 @@ public:
     return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
   }
 
-  // getSelect - Helper function to make it easier to build Select's if you just
-  // have operands and don't want to check for vector.
+  /// Helper function to make it easier to build Select's if you just
+  /// have operands and don't want to check for vector.
   SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond,
                     SDValue LHS, SDValue RHS) {
     assert(LHS.getValueType() == RHS.getValueType() &&
@@ -738,7 +723,7 @@ public:
                    Cond, LHS, RHS);
   }
 
-  /// getSelectCC - Helper function to make it easier to build SelectCC's if you
+  /// Helper function to make it easier to build SelectCC's if you
   /// just have an ISD::CondCode instead of an SDValue.
   ///
   SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS,
@@ -747,12 +732,12 @@ public:
                    LHS, RHS, True, False, getCondCode(Cond));
   }
 
-  /// getVAArg - VAArg produces a result and token chain, and takes a pointer
+  /// VAArg produces a result and token chain, and takes a pointer
   /// and a source value as input.
   SDValue getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
                    SDValue SV, unsigned Align);
 
-  /// getAtomicCmpSwap - Gets a node for an atomic cmpxchg op. There are two
+  /// Gets a node for an atomic cmpxchg op. There are two
   /// valid Opcodes. ISD::ATOMIC_CMO_SWAP produces the value loaded and a
   /// chain result. ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS produces the value loaded,
   /// a success flag (initially i1), and a chain.
@@ -769,7 +754,7 @@ public:
                            AtomicOrdering FailureOrdering,
                            SynchronizationScope SynchScope);
 
-  /// getAtomic - Gets a node for an atomic op, produces result (if relevant)
+  /// Gets a node for an atomic op, produces result (if relevant)
   /// and chain and takes 2 operands.
   SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Val, const Value *PtrVal,
@@ -780,15 +765,15 @@ public:
                     AtomicOrdering Ordering,
                     SynchronizationScope SynchScope);
 
-  /// getAtomic - Gets a node for an atomic op, produces result and chain and
+  /// Gets a node for an atomic op, produces result and chain and
   /// takes 1 operand.
   SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT,
                     SDValue Chain, SDValue Ptr, MachineMemOperand *MMO,
                     AtomicOrdering Ordering,
                     SynchronizationScope SynchScope);
 
-  /// getAtomic - Gets a node for an atomic op, produces result and chain and
-  /// takes N operands.
+  /// Gets a node for an atomic op, produces result and chain and takes N
+  /// operands.
   SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTList,
                     ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
                     AtomicOrdering SuccessOrdering,
@@ -798,7 +783,7 @@ public:
                     ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
                     AtomicOrdering Ordering, SynchronizationScope SynchScope);
 
-  /// getMemIntrinsicNode - Creates a MemIntrinsicNode that may produce a
+  /// Creates a MemIntrinsicNode that may produce a
   /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
   /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
   /// less than FIRST_TARGET_MEMORY_OPCODE.
@@ -813,10 +798,10 @@ public:
                               ArrayRef<SDValue> Ops,
                               EVT MemVT, MachineMemOperand *MMO);
 
-  /// getMergeValues - Create a MERGE_VALUES node from the given operands.
+  /// Create a MERGE_VALUES node from the given operands.
   SDValue getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl);
 
-  /// getLoad - Loads are not normal binary operators: their result type is not
+  /// Loads are not normal binary operators: their result type is not
   /// determined by their operands, and they produce a value AND a token chain.
   ///
   SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
@@ -848,8 +833,7 @@ public:
                   SDValue Chain, SDValue Ptr, SDValue Offset,
                   EVT MemVT, MachineMemOperand *MMO);
 
-  /// getStore - Helper function to build ISD::STORE nodes.
-  ///
+  /// Helper function to build ISD::STORE nodes.
   SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
                    MachinePointerInfo PtrInfo, bool isVolatile,
                    bool isNonTemporal, unsigned Alignment,
@@ -872,21 +856,21 @@ public:
   SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
                          SDValue Ptr, SDValue Mask, EVT MemVT,
                          MachineMemOperand *MMO, bool IsTrunc);
-  /// getSrcValue - Construct a node to track a Value* through the backend.
+  /// Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
 
-  /// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+  /// Return an MDNodeSDNode which holds an MDNode.
   SDValue getMDNode(const MDNode *MD);
 
-  /// getAddrSpaceCast - Return an AddrSpaceCastSDNode.
+  /// Return an AddrSpaceCastSDNode.
   SDValue getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
                            unsigned SrcAS, unsigned DestAS);
 
-  /// getShiftAmountOperand - Return the specified value casted to
+  /// Return the specified value casted to
   /// the target's desired shift amount type.
   SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
 
-  /// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+  /// *Mutate* the specified node in-place to have the
   /// specified operands.  If the resultant node already exists in the DAG,
   /// this does not modify the specified node, instead it returns the node that
   /// already exists.  If the resultant node does not exist in the DAG, the
@@ -902,7 +886,7 @@ public:
                                SDValue Op3, SDValue Op4, SDValue Op5);
   SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
 
-  /// SelectNodeTo - These are used for target selectors to *mutate* the
+  /// These are used for target selectors to *mutate* the
   /// specified node to have the specified return type, Target opcode, and
   /// operands.  Note that target opcodes are stored as
   /// ~TargetOpcode in the node opcode field.  The resultant node is returned.
@@ -932,12 +916,12 @@ public:
   SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, SDVTList VTs,
                        ArrayRef<SDValue> Ops);
 
-  /// MorphNodeTo - This *mutates* the specified node to have the specified
+  /// This *mutates* the specified node to have the specified
   /// return type, opcode, and operands.
   SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs,
                       ArrayRef<SDValue> Ops);
 
-  /// getMachineNode - These are used for target selectors to create a new node
+  /// These are used for target selectors to create a new node
   /// with specified return type(s), MachineInstr opcode, and operands.
   ///
   /// Note that getMachineNode returns the resultant node.  If there is already
@@ -976,25 +960,20 @@ public:
   MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, SDVTList VTs,
                                 ArrayRef<SDValue> Ops);
 
-  /// getTargetExtractSubreg - A convenience function for creating
-  /// TargetInstrInfo::EXTRACT_SUBREG nodes.
+  /// A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
   SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
                                  SDValue Operand);
 
-  /// getTargetInsertSubreg - A convenience function for creating
-  /// TargetInstrInfo::INSERT_SUBREG nodes.
+  /// A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
   SDValue getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
                                 SDValue Operand, SDValue Subreg);
 
-  /// getNodeIfExists - Get the specified node if it's already available, or
-  /// else return NULL.
+  /// Get the specified node if it's already available, or else return NULL.
   SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, ArrayRef<SDValue> Ops,
                           bool nuw = false, bool nsw = false,
                           bool exact = false);
 
-  /// getDbgValue - Creates a SDDbgValue node.
-  ///
-  /// SDNode
+  /// Creates a SDDbgValue node.
   SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R,
                           bool IsIndirect, uint64_t Off, DebugLoc DL,
                           unsigned O);
@@ -1007,16 +986,16 @@ public:
   SDDbgValue *getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI,
                                     uint64_t Off, DebugLoc DL, unsigned O);
 
-  /// RemoveDeadNode - Remove the specified node from the system. If any of its
+  /// Remove the specified node from the system. If any of its
   /// operands then becomes dead, remove them as well. Inform UpdateListener
   /// for each node deleted.
   void RemoveDeadNode(SDNode *N);
 
-  /// RemoveDeadNodes - This method deletes the unreachable nodes in the
+  /// This method deletes the unreachable nodes in the
   /// given list, and any nodes that become unreachable as a result.
   void RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes);
 
-  /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+  /// Modify anything using 'From' to use 'To' instead.
   /// This can cause recursive merging of nodes in the DAG.  Use the first
   /// version if 'From' is known to have a single result, use the second
   /// if you have two nodes with identical results (or if 'To' has a superset
@@ -1035,30 +1014,29 @@ public:
   void ReplaceAllUsesWith(SDNode *From, SDNode *To);
   void ReplaceAllUsesWith(SDNode *From, const SDValue *To);
 
-  /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+  /// Replace any uses of From with To, leaving
   /// uses of other values produced by From.Val alone.
   void ReplaceAllUsesOfValueWith(SDValue From, SDValue To);
 
-  /// ReplaceAllUsesOfValuesWith - Like ReplaceAllUsesOfValueWith, but
-  /// for multiple values at once. This correctly handles the case where
+  /// Like ReplaceAllUsesOfValueWith, but for multiple values at once.
+  /// This correctly handles the case where
   /// there is an overlap between the From values and the To values.
   void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To,
                                   unsigned Num);
 
-  /// AssignTopologicalOrder - Topological-sort the AllNodes list and a
+  /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
   /// topological order. Returns the number of nodes.
   unsigned AssignTopologicalOrder();
 
-  /// RepositionNode - Move node N in the AllNodes list to be immediately
+  /// Move node N in the AllNodes list to be immediately
   /// before the given iterator Position. This may be used to update the
   /// topological ordering when the list of nodes is modified.
   void RepositionNode(allnodes_iterator Position, SDNode *N) {
     AllNodes.insert(Position, AllNodes.remove(N));
   }
 
-  /// isCommutativeBinOp - Returns true if the opcode is a commutative binary
-  /// operation.
+  /// Returns true if the opcode is a commutative binary operation.
   static bool isCommutativeBinOp(unsigned Opcode) {
     // FIXME: This should get its info from the td file, so that we can include
     // target info.
@@ -1099,19 +1077,19 @@ public:
     }
   }
 
-  /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+  /// Add a dbg_value SDNode. If SD is non-null that means the
   /// value is produced by SD.
   void AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter);
 
-  /// GetDbgValues - Get the debug values which reference the given SDNode.
+  /// Get the debug values which reference the given SDNode.
   ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) {
     return DbgInfo->getSDDbgValues(SD);
   }
 
-  /// TransferDbgValues - Transfer SDDbgValues.
+  /// Transfer SDDbgValues.
   void TransferDbgValues(SDValue From, SDValue To);
 
-  /// hasDebugValues - Return true if there are any SDDbgValue nodes associated
+  /// Return true if there are any SDDbgValue nodes associated
   /// with this SelectionDAG.
   bool hasDebugValues() const { return !DbgInfo->empty(); }
 
@@ -1126,28 +1104,27 @@ public:
 
   void dump() const;
 
-  /// CreateStackTemporary - Create a stack temporary, suitable for holding the
+  /// Create a stack temporary, suitable for holding the
   /// specified value type.  If minAlign is specified, the slot size will have
   /// at least that alignment.
   SDValue CreateStackTemporary(EVT VT, unsigned minAlign = 1);
 
-  /// CreateStackTemporary - Create a stack temporary suitable for holding
+  /// Create a stack temporary suitable for holding
   /// either of the specified value types.
   SDValue CreateStackTemporary(EVT VT1, EVT VT2);
 
-  /// FoldConstantArithmetic -
   SDValue FoldConstantArithmetic(unsigned Opcode, EVT VT,
                                  SDNode *Cst1, SDNode *Cst2);
 
-  /// FoldSetCC - Constant fold a setcc to true or false.
+  /// Constant fold a setcc to true or false.
   SDValue FoldSetCC(EVT VT, SDValue N1,
                     SDValue N2, ISD::CondCode Cond, SDLoc dl);
 
-  /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
-  /// use this predicate to simplify operations downstream.
+  /// Return true if the sign bit of Op is known to be zero.
+  /// We use this predicate to simplify operations downstream.
   bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const;
 
-  /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero.  We
+  /// Return true if 'Op & Mask' is known to be zero.  We
   /// use this predicate to simplify operations downstream.  Op and Mask are
   /// known to be the same type.
   bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth = 0)
@@ -1160,7 +1137,7 @@ public:
   void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
                         unsigned Depth = 0) const;
 
-  /// ComputeNumSignBits - Return the number of times the sign bit of the
+  /// Return the number of times the sign bit of the
   /// register is replicated into the other bits.  We know that at least 1 bit
   /// is always equal to the sign bit (itself), but other cases can give us
   /// information.  For example, immediately after an "SRA X, 2", we know that
@@ -1169,26 +1146,26 @@ public:
   /// class to allow target nodes to be understood.
   unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
 
-  /// isBaseWithConstantOffset - Return true if the specified operand is an
+  /// Return true if the specified operand is an
   /// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
   /// ISD::OR with a ConstantSDNode that is guaranteed to have the same
   /// semantics as an ADD.  This handles the equivalence:
   ///     X|Cst == X+Cst iff X&Cst = 0.
   bool isBaseWithConstantOffset(SDValue Op) const;
 
-  /// isKnownNeverNan - Test whether the given SDValue is known to never be NaN.
+  /// Test whether the given SDValue is known to never be NaN.
   bool isKnownNeverNaN(SDValue Op) const;
 
-  /// isKnownNeverZero - Test whether the given SDValue is known to never be
+  /// Test whether the given SDValue is known to never be
   /// positive or negative Zero.
   bool isKnownNeverZero(SDValue Op) const;
 
-  /// isEqualTo - Test whether two SDValues are known to compare equal. This
+  /// Test whether two SDValues are known to compare equal. This
   /// is true if they are the same value, or if one is negative zero and the
   /// other positive zero.
   bool isEqualTo(SDValue A, SDValue B) const;
 
-  /// UnrollVectorOp - Utility function used by legalize and lowering to
+  /// Utility function used by legalize and lowering to
   /// "unroll" a vector operation by splitting out the scalars and operating
   /// on each element individually.  If the ResNE is 0, fully unroll the vector
   /// op. If ResNE is less than the width of the vector op, unroll up to ResNE.
@@ -1196,43 +1173,40 @@ public:
   /// vector op and fill the end of the resulting vector with UNDEFS.
   SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
 
-  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-  /// location that is 'Dist' units away from the location that the 'Base' load
-  /// is loading from.
+  /// Return true if LD is loading 'Bytes' bytes from a location that is 'Dist'
+  /// units away from the location that the 'Base' load is loading from.
   bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
                          unsigned Bytes, int Dist) const;
 
-  /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+  /// Infer alignment of a load / store address. Return 0 if
   /// it cannot be inferred.
   unsigned InferPtrAlignment(SDValue Ptr) const;
 
-  /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+  /// Compute the VTs needed for the low/hi parts of a type
   /// which is split (or expanded) into two not necessarily identical pieces.
   std::pair<EVT, EVT> GetSplitDestVTs(const EVT &VT) const;
 
-  /// SplitVector - Split the vector with EXTRACT_SUBVECTOR using the provides
+  /// Split the vector with EXTRACT_SUBVECTOR using the provides
   /// VTs and return the low/high part.
   std::pair<SDValue, SDValue> SplitVector(const SDValue &N, const SDLoc &DL,
                                           const EVT &LoVT, const EVT &HiVT);
 
-  /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
-  /// low/high part.
+  /// Split the vector with EXTRACT_SUBVECTOR and return the low/high part.
   std::pair<SDValue, SDValue> SplitVector(const SDValue &N, const SDLoc &DL) {
     EVT LoVT, HiVT;
     std::tie(LoVT, HiVT) = GetSplitDestVTs(N.getValueType());
     return SplitVector(N, DL, LoVT, HiVT);
   }
 
-  /// SplitVectorOperand - Split the node's operand with EXTRACT_SUBVECTOR and
+  /// Split the node's operand with EXTRACT_SUBVECTOR and
   /// return the low/high part.
   std::pair<SDValue, SDValue> SplitVectorOperand(const SDNode *N, unsigned OpNo)
   {
     return SplitVector(N->getOperand(OpNo), SDLoc(N));
   }
 
-  /// ExtractVectorElements - Append the extracted elements from Start to Count
-  /// out of the vector Op in Args. If Count is 0, all of the elements will be
-  /// extracted.
+  /// Append the extracted elements from Start to Count out of the vector Op
+  /// in Args. If Count is 0, all of the elements will be extracted.
   void ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args,
                              unsigned Start = 0, unsigned Count = 0);
 
@@ -1258,10 +1232,10 @@ private:
                                 SDValue N1, SDValue N2, bool nuw, bool nsw,
                                 bool exact);
 
-  /// VTList - List of non-single value types.
+  /// List of non-single value types.
   FoldingSet<SDVTListNode> VTListMap;
 
-  /// CondCodeNodes - Maps to auto-CSE operations.
+  /// Maps to auto-CSE operations.
   std::vector<CondCodeSDNode*> CondCodeNodes;
 
   std::vector<SDNode*> ValueTypeNodes;
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 7acdfc7..a874375 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -58,7 +58,7 @@ public:
 
   explicit SelectionDAGISel(TargetMachine &tm,
                             CodeGenOpt::Level OL = CodeGenOpt::Default);
-  virtual ~SelectionDAGISel();
+  ~SelectionDAGISel() override;
 
   const TargetLowering *getTargetLowering() const { return TLI; }
 
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 2b3e08c..66f060c 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -50,23 +50,6 @@ template <typename T> struct DenseMapInfo;
 template <typename T> struct simplify_type;
 template <typename T> struct ilist_traits;
 
-/// Returns true if the opcode is a binary operation with flags.
-static bool isBinOpWithFlags(unsigned Opcode) {
-  switch (Opcode) {
-  case ISD::SDIV:
-  case ISD::UDIV:
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::MUL:
-  case ISD::ADD:
-  case ISD::SUB:
-  case ISD::SHL:
-    return true;
-  default:
-    return false;
-  }
-}
-
 void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
                     bool force = false);
 
@@ -967,6 +950,23 @@ public:
   }
 };
 
+/// Returns true if the opcode is a binary operation with flags.
+static bool isBinOpWithFlags(unsigned Opcode) {
+  switch (Opcode) {
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::MUL:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::SHL:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// This class is an extension of BinarySDNode
 /// used from those opcodes that have associated extra flags.
 class BinaryWithFlagsSDNode : public BinarySDNode {
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 75920a3..a868cbd 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -34,11 +34,12 @@ namespace llvm {
 
 class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
   bool UseInitArray;
+  mutable unsigned NextUniqueID = 0;
 
 public:
   TargetLoweringObjectFileELF() : UseInitArray(false) {}
 
-  virtual ~TargetLoweringObjectFileELF() {}
+  ~TargetLoweringObjectFileELF() override {}
 
   void emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM,
                             const MCSymbol *Sym) const override;
@@ -87,7 +88,7 @@ public:
 
 class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
 public:
-  virtual ~TargetLoweringObjectFileMachO() {}
+  ~TargetLoweringObjectFileMachO() override {}
   TargetLoweringObjectFileMachO();
 
   /// Extract the dependent library name from a linker option string. Returns
@@ -135,7 +136,7 @@ public:
 
 class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
 public:
-  virtual ~TargetLoweringObjectFileCOFF() {}
+  ~TargetLoweringObjectFileCOFF() override {}
 
   const MCSection *
     getExplicitSectionGlobal(const GlobalValue *GV,
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
new file mode 100644
index 0000000..5fc2b12
--- /dev/null
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -0,0 +1,153 @@
+//===-- llvm/CodeGen/WinEHFuncInfo.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures and associated state for Windows exception handling schemes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_WINEHFUNCINFO_H
+#define LLVM_CODEGEN_WINEHFUNCINFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+class BasicBlock;
+class Constant;
+class Function;
+class GlobalValue;
+class IntrinsicInst;
+class LandingPadInst;
+class MCSymbol;
+class Value;
+
+enum ActionType { Catch, Cleanup };
+
+class ActionHandler {
+public:
+  ActionHandler(BasicBlock *BB, ActionType Type)
+      : StartBB(BB), Type(Type), EHState(-1), HandlerBlockOrFunc(nullptr) {}
+
+  ActionType getType() const { return Type; }
+  BasicBlock *getStartBlock() const { return StartBB; }
+
+  bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; }
+
+  void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; }
+  Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; }
+
+  void setEHState(int State) { EHState = State; }
+  int getEHState() const { return EHState; }
+
+private:
+  BasicBlock *StartBB;
+  ActionType Type;
+  int EHState;
+
+  // Can be either a BlockAddress or a Function depending on the EH personality.
+  Constant *HandlerBlockOrFunc;
+};
+
+class CatchHandler : public ActionHandler {
+public:
+  CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB)
+      : ActionHandler(BB, ActionType::Catch), Selector(Selector),
+      NextBB(NextBB), ExceptionObjectVar(nullptr),
+      ExceptionObjectIndex(-1) {}
+
+  // Method for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ActionHandler *H) {
+    return H->getType() == ActionType::Catch;
+  }
+
+  Constant *getSelector() const { return Selector; }
+  BasicBlock *getNextBB() const { return NextBB; }
+
+  const Value *getExceptionVar() { return ExceptionObjectVar; }
+  TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
+
+  void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; }
+  void setExceptionVarIndex(int Index) { ExceptionObjectIndex = Index;  }
+  int getExceptionVarIndex() const { return ExceptionObjectIndex; }
+  void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) {
+    ReturnTargets = Targets;
+  }
+
+private:
+  Constant *Selector;
+  BasicBlock *NextBB;
+  // While catch handlers are being outlined the ExceptionObjectVar field will
+  // be populated with the instruction in the parent frame that corresponds
+  // to the exception object (or nullptr if the catch does not use an
+  // exception object) and the ExceptionObjectIndex field will be -1.
+  // When the parseEHActions function is called to populate a vector of
+  // instances of this class, the ExceptionObjectVar field will be nullptr
+  // and the ExceptionObjectIndex will be the index of the exception object in
+  // the parent function's frameescape block.
+  const Value *ExceptionObjectVar;
+  int ExceptionObjectIndex;
+  TinyPtrVector<BasicBlock *> ReturnTargets;
+};
+
+class CleanupHandler : public ActionHandler {
+public:
+  CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {}
+
+  // Method for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ActionHandler *H) {
+    return H->getType() == ActionType::Cleanup;
+  }
+};
+
+void parseEHActions(const IntrinsicInst *II,
+  SmallVectorImpl<ActionHandler *> &Actions);
+
+
+// The following structs respresent the .xdata for functions using C++
+// exceptions on Windows.
+
+struct WinEHUnwindMapEntry {
+  int ToState;
+  Function *Cleanup;
+};
+
+struct WinEHHandlerType {
+  int Adjectives;
+  GlobalVariable *TypeDescriptor;
+  int CatchObjRecoverIdx;
+  Function *Handler;
+};
+
+struct WinEHTryBlockMapEntry {
+  int TryLow;
+  int TryHigh;
+  SmallVector<WinEHHandlerType, 1> HandlerArray;
+};
+
+struct WinEHFuncInfo {
+  DenseMap<const LandingPadInst *, int> LandingPadStateMap;
+  DenseMap<const Function *, int> CatchHandlerParentFrameObjIdx;
+  DenseMap<const Function *, int> CatchHandlerParentFrameObjOffset;
+  DenseMap<const Function *, int> CatchHandlerMaxState;
+  SmallVector<WinEHUnwindMapEntry, 4> UnwindMap;
+  SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
+  SmallVector<std::pair<MCSymbol *, int>, 4> IPToStateList;
+  int UnwindHelpFrameIdx;
+  int UnwindHelpFrameOffset;
+
+  unsigned NumIPToStateFuncsVisited;
+
+  WinEHFuncInfo()
+      : UnwindHelpFrameIdx(INT_MAX), UnwindHelpFrameOffset(-1),
+        NumIPToStateFuncsVisited(0) {}
+};
+
+}
+#endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 29fa4a3..2b377a9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -46,7 +46,7 @@ protected:
                          const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
                          StringRef SOS, StringRef AOS, bool isLittleEndian) = 0;
 
-  ~DWARFUnitSectionBase() {}
+  ~DWARFUnitSectionBase() = default;
 };
 
 /// Concrete instance of DWARFUnitSection, specialized for one Unit type.
diff --git a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index 96ce12f..b5fa8c3 100644
--- a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -22,7 +22,7 @@ public:
   ConcreteSymbolEnumerator(std::unique_ptr<IPDBEnumSymbols> SymbolEnumerator)
       : Enumerator(std::move(SymbolEnumerator)) {}
 
-  virtual ~ConcreteSymbolEnumerator() {}
+  ~ConcreteSymbolEnumerator() override {}
 
   uint32_t getChildCount() const override {
     return Enumerator->getChildCount();
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index abdaa0c..4b2add8 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H
 #define LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H
 
+#include "RuntimeDyld.h"
 #include "llvm-c/ExecutionEngine.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -42,6 +43,7 @@ class GlobalVariable;
 class GlobalValue;
 class JITEventListener;
 class MachineCodeInfo;
+class MCJITMemoryManager;
 class MutexGuard;
 class ObjectCache;
 class RTDyldMemoryManager;
@@ -57,46 +59,34 @@ namespace object {
 /// table.  Access to this class should be serialized under a mutex.
 class ExecutionEngineState {
 public:
-  struct AddressMapConfig : public ValueMapConfig<const GlobalValue*> {
-    typedef ExecutionEngineState *ExtraData;
-    static sys::Mutex *getMutex(ExecutionEngineState *EES);
-    static void onDelete(ExecutionEngineState *EES, const GlobalValue *Old);
-    static void onRAUW(ExecutionEngineState *, const GlobalValue *,
-                       const GlobalValue *);
-  };
-
-  typedef ValueMap<const GlobalValue *, void *, AddressMapConfig>
-      GlobalAddressMapTy;
+  typedef StringMap<uint64_t> GlobalAddressMapTy;
 
 private:
-  ExecutionEngine &EE;
 
-  /// GlobalAddressMap - A mapping between LLVM global values and their
-  /// actualized version...
+  /// GlobalAddressMap - A mapping between LLVM global symbol names values and
+  /// their actualized version...
   GlobalAddressMapTy GlobalAddressMap;
 
   /// GlobalAddressReverseMap - This is the reverse mapping of GlobalAddressMap,
   /// used to convert raw addresses into the LLVM global value that is emitted
   /// at the address.  This map is not computed unless getGlobalValueAtAddress
   /// is called at some point.
-  std::map<void *, AssertingVH<const GlobalValue> > GlobalAddressReverseMap;
+  std::map<uint64_t, std::string> GlobalAddressReverseMap;
 
 public:
-  ExecutionEngineState(ExecutionEngine &EE);
 
   GlobalAddressMapTy &getGlobalAddressMap() {
     return GlobalAddressMap;
   }
 
-  std::map<void*, AssertingVH<const GlobalValue> > &
-  getGlobalAddressReverseMap() {
+  std::map<uint64_t, std::string> &getGlobalAddressReverseMap() {
     return GlobalAddressReverseMap;
   }
 
   /// \brief Erase an entry from the mapping table.
   ///
   /// \returns The address that \p ToUnmap was happed to.
-  void *RemoveMapping(const GlobalValue *ToUnmap);
+  uint64_t RemoveMapping(StringRef Name);
 };
 
 /// \brief Abstract interface for implementation execution of LLVM modules,
@@ -139,15 +129,17 @@ protected:
   virtual char *getMemoryForGV(const GlobalVariable *GV);
 
   static ExecutionEngine *(*MCJITCtor)(
-                                     std::unique_ptr<Module> M,
-                                     std::string *ErrorStr,
-                                     std::unique_ptr<RTDyldMemoryManager> MCJMM,
-                                     std::unique_ptr<TargetMachine> TM);
+                                std::unique_ptr<Module> M,
+                                std::string *ErrorStr,
+                                std::shared_ptr<MCJITMemoryManager> MM,
+                                std::shared_ptr<RuntimeDyld::SymbolResolver> SR,
+                                std::unique_ptr<TargetMachine> TM);
 
   static ExecutionEngine *(*OrcMCJITReplacementCtor)(
-                                    std::string *ErrorStr,
-                                    std::unique_ptr<RTDyldMemoryManager> OrcJMM,
-                                    std::unique_ptr<TargetMachine> TM);
+                                std::string *ErrorStr,
+                                std::shared_ptr<MCJITMemoryManager> MM,
+                                std::shared_ptr<RuntimeDyld::SymbolResolver> SR,
+                                std::unique_ptr<TargetMachine> TM);
 
   static ExecutionEngine *(*InterpCtor)(std::unique_ptr<Module> M,
                                         std::string *ErrorStr);
@@ -157,6 +149,9 @@ protected:
   /// abort.
   void *(*LazyFunctionCreator)(const std::string &);
 
+  /// getMangledName - Get mangled name.
+  std::string getMangledName(const GlobalValue *GV);
+
 public:
   /// lock - This lock protects the ExecutionEngine and MCJIT classes. It must
   /// be held while changing the internal state of any of those classes.
@@ -228,7 +223,8 @@ public:
   /// Map the address of a JIT section as returned from the memory manager
   /// to the address in the target process as the running code will see it.
   /// This is the address which will be used for relocation resolution.
-  virtual void mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress) {
+  virtual void mapSectionAddress(const void *LocalAddress,
+                                 uint64_t TargetAddress) {
     llvm_unreachable("Re-mapping of section addresses not supported with this "
                      "EE!");
   }
@@ -286,6 +282,7 @@ public:
   /// existing data in memory.  Mappings are automatically removed when their
   /// GlobalValue is destroyed.
   void addGlobalMapping(const GlobalValue *GV, void *Addr);
+  void addGlobalMapping(StringRef Name, uint64_t Addr);
 
   /// clearAllGlobalMappings - Clear all global mappings and start over again,
   /// for use in dynamic compilation scenarios to move globals.
@@ -299,14 +296,17 @@ public:
   /// address.  This updates both maps as required.  If "Addr" is null, the
   /// entry for the global is removed from the mappings.  This returns the old
   /// value of the pointer, or null if it was not in the map.
-  void *updateGlobalMapping(const GlobalValue *GV, void *Addr);
+  uint64_t updateGlobalMapping(const GlobalValue *GV, void *Addr);
+  uint64_t updateGlobalMapping(StringRef Name, uint64_t Addr);
+
+  /// getAddressToGlobalIfAvailable - This returns the address of the specified
+  /// global symbol.
+  uint64_t getAddressToGlobalIfAvailable(StringRef S);
 
   /// getPointerToGlobalIfAvailable - This returns the address of the specified
   /// global value if it is has already been codegen'd, otherwise it returns
   /// null.
-  ///
-  /// This function is deprecated for the MCJIT execution engine.  It doesn't
-  /// seem to be needed in that case, but an equivalent can be added if it is.
+  void *getPointerToGlobalIfAvailable(StringRef S);
   void *getPointerToGlobalIfAvailable(const GlobalValue *GV);
 
   /// getPointerToGlobal - This returns the address of the specified global
@@ -470,7 +470,7 @@ public:
   }
 
 protected:
-  ExecutionEngine() : EEState(*this) {}
+  ExecutionEngine() {}
   explicit ExecutionEngine(std::unique_ptr<Module> M);
 
   void emitGlobals();
@@ -500,7 +500,8 @@ private:
   EngineKind::Kind WhichEngine;
   std::string *ErrorStr;
   CodeGenOpt::Level OptLevel;
-  std::unique_ptr<RTDyldMemoryManager> MCJMM;
+  std::shared_ptr<MCJITMemoryManager> MemMgr;
+  std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver;
   TargetOptions Options;
   Reloc::Model RelocModel;
   CodeModel::Model CMModel;
@@ -535,6 +536,12 @@ public:
   /// memory manager. This option defaults to NULL.
   EngineBuilder &setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager> mcjmm);
 
+  EngineBuilder&
+  setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM);
+
+  EngineBuilder&
+  setSymbolResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> SR);
+
   /// setErrorStr - Set the error string to write to on error.  This option
   /// defaults to NULL.
   EngineBuilder &setErrorStr(std::string *e) {
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 77b0c48..30f7f1c 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -16,7 +16,7 @@
 #define LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
 
 #include "IndirectionUtils.h"
-#include "LookasideRTDyldMM.h"
+#include "LambdaResolver.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include <list>
@@ -36,7 +36,7 @@ namespace orc {
 /// compiled only when it is first called.
 template <typename BaseLayerT, typename CompileCallbackMgrT>
 class CompileOnDemandLayer {
-public:
+private:
   /// @brief Lookup helper that provides compatibility with the classic
   ///        static-compilation symbol resolution process.
   ///
@@ -64,6 +64,8 @@ public:
     /// @brief Construct a scoped lookup.
     CODScopedLookup(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
 
+    virtual ~CODScopedLookup() {}
+
     /// @brief Start a new context for a single logical module.
     LMHandle createLogicalModule() {
       Handles.push_back(SiblingHandlesList());
@@ -92,6 +94,10 @@ public:
       return nullptr;
     }
 
+    /// @brief Find an external symbol (via the user supplied SymbolResolver).
+    virtual RuntimeDyld::SymbolInfo
+    externalLookup(const std::string &Name) const = 0;
+
   private:
 
     JITSymbol findSymbolIn(LMHandle LMH, const std::string &Name) {
@@ -105,7 +111,29 @@ public:
     PseudoDylibModuleSetHandlesList Handles;
   };
 
-private:
+  template <typename ResolverPtrT>
+  class CODScopedLookupImpl : public CODScopedLookup {
+  public:
+    CODScopedLookupImpl(BaseLayerT &BaseLayer, ResolverPtrT Resolver)
+      : CODScopedLookup(BaseLayer), Resolver(std::move(Resolver)) {}
+
+    RuntimeDyld::SymbolInfo
+    externalLookup(const std::string &Name) const override {
+      return Resolver->findSymbol(Name);
+    }
+
+  private:
+    ResolverPtrT Resolver;
+  };
+
+  template <typename ResolverPtrT>
+  static std::shared_ptr<CODScopedLookup>
+  createCODScopedLookup(BaseLayerT &BaseLayer,
+                        ResolverPtrT Resolver) {
+    typedef CODScopedLookupImpl<ResolverPtrT> Impl;
+    return std::make_shared<Impl>(BaseLayer, std::move(Resolver));
+  }
+
   typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
   typedef std::vector<BaseLayerModuleSetHandleT> BaseLayerModuleSetHandleListT;
 
@@ -138,36 +166,31 @@ public:
   /// @brief Handle to a set of loaded modules.
   typedef typename ModuleSetInfoListT::iterator ModuleSetHandleT;
 
-  // @brief Fallback lookup functor.
-  typedef std::function<uint64_t(const std::string &)> LookupFtor;
-
   /// @brief Construct a compile-on-demand layer instance.
   CompileOnDemandLayer(BaseLayerT &BaseLayer, CompileCallbackMgrT &CallbackMgr)
       : BaseLayer(BaseLayer), CompileCallbackMgr(CallbackMgr) {}
 
   /// @brief Add a module to the compile-on-demand layer.
-  template <typename ModuleSetT>
+  template <typename ModuleSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
   ModuleSetHandleT addModuleSet(ModuleSetT Ms,
-                                LookupFtor FallbackLookup = nullptr) {
+                                MemoryManagerPtrT MemMgr,
+                                SymbolResolverPtrT Resolver) {
 
-    // If the user didn't supply a fallback lookup then just use
-    // getSymbolAddress.
-    if (!FallbackLookup)
-      FallbackLookup = [=](const std::string &Name) {
-                         return findSymbol(Name, true).getAddress();
-                       };
+    assert(MemMgr == nullptr &&
+           "User supplied memory managers not supported with COD yet.");
 
     // Create a lookup context and ModuleSetInfo for this module set.
     // For the purposes of symbol resolution the set Ms will be treated as if
     // the modules it contained had been linked together as a dylib.
-    auto DylibLookup = std::make_shared<CODScopedLookup>(BaseLayer);
+    auto DylibLookup = createCODScopedLookup(BaseLayer, std::move(Resolver));
     ModuleSetHandleT H =
         ModuleSetInfos.insert(ModuleSetInfos.end(), ModuleSetInfo(DylibLookup));
     ModuleSetInfo &MSI = ModuleSetInfos.back();
 
     // Process each of the modules in this module set.
     for (auto &M : Ms)
-      partitionAndAdd(*M, MSI, FallbackLookup);
+      partitionAndAdd(*M, MSI);
 
     return H;
   }
@@ -203,8 +226,7 @@ public:
 
 private:
 
-  void partitionAndAdd(Module &M, ModuleSetInfo &MSI,
-                       LookupFtor FallbackLookup) {
+  void partitionAndAdd(Module &M, ModuleSetInfo &MSI) {
     const char *AddrSuffix = "$orc_addr";
     const char *BodySuffix = "$orc_body";
 
@@ -224,8 +246,7 @@ private:
     auto FunctionModules = std::move(PartitionedModule.Functions);
 
     // Emit the commons stright away.
-    auto CommonHandle = addModule(std::move(CommonsModule), MSI, LogicalModule,
-                                  FallbackLookup);
+    auto CommonHandle = addModule(std::move(CommonsModule), MSI, LogicalModule);
     BaseLayer.emitAndFinalize(CommonHandle);
 
     // Map of definition names to callback-info data structures. We'll use
@@ -255,10 +276,12 @@ private:
         Function *Proto = StubsModule->getFunction(Name);
         assert(Proto && "Failed to clone function decl into stubs module.");
         auto CallbackInfo =
-          CompileCallbackMgr.getCompileCallback(*Proto->getFunctionType());
+          CompileCallbackMgr.getCompileCallback(Proto->getContext());
         GlobalVariable *FunctionBodyPointer =
-          createImplPointer(*Proto, Name + AddrSuffix,
-                            CallbackInfo.getAddress());
+          createImplPointer(*Proto->getType(), *Proto->getParent(),
+                            Name + AddrSuffix,
+                            createIRTypedAddress(*Proto->getFunctionType(),
+                                                 CallbackInfo.getAddress()));
         makeStub(*Proto, *FunctionBodyPointer);
 
         F.setName(Name + BodySuffix);
@@ -268,7 +291,7 @@ private:
         NewStubInfos.push_back(StubInfos.insert(StubInfos.begin(), KV));
       }
 
-      auto H = addModule(std::move(SubM), MSI, LogicalModule, FallbackLookup);
+      auto H = addModule(std::move(SubM), MSI, LogicalModule);
 
       // Set the compile actions for this module:
       for (auto &KVPair : NewStubInfos) {
@@ -286,7 +309,7 @@ private:
     // Ok - we've processed all the partitioned modules. Now add the
     // stubs/globals module and set the update actions.
     auto StubsH =
-      addModule(std::move(StubsModule), MSI, LogicalModule, FallbackLookup);
+      addModule(std::move(StubsModule), MSI, LogicalModule);
 
     for (auto &KVPair : StubInfos) {
       std::string AddrName = Mangle(KVPair.first + AddrSuffix,
@@ -304,8 +327,7 @@ private:
   BaseLayerModuleSetHandleT addModule(
                                std::unique_ptr<Module> M,
                                ModuleSetInfo &MSI,
-                               typename CODScopedLookup::LMHandle LogicalModule,
-                               LookupFtor FallbackLookup) {
+                               typename CODScopedLookup::LMHandle LogicalModule) {
 
     // Add this module to the JIT with a memory manager that uses the
     // DylibLookup to resolve symbols.
@@ -313,19 +335,25 @@ private:
     MSet.push_back(std::move(M));
 
     auto DylibLookup = MSI.Lookup;
-    auto MM =
-      createLookasideRTDyldMM<SectionMemoryManager>(
+    auto Resolver =
+      createLambdaResolver(
         [=](const std::string &Name) {
           if (auto Symbol = DylibLookup->findSymbol(LogicalModule, Name))
-            return Symbol.getAddress();
-          return FallbackLookup(Name);
+            return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
+                                           Symbol.getFlags());
+          return DylibLookup->externalLookup(Name);
         },
-        [=](const std::string &Name) {
-          return DylibLookup->findSymbol(LogicalModule, Name).getAddress();
+        [=](const std::string &Name) -> RuntimeDyld::SymbolInfo {
+          if (auto Symbol = DylibLookup->findSymbol(LogicalModule, Name))
+            return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
+                                           Symbol.getFlags());
+          return nullptr;
         });
 
     BaseLayerModuleSetHandleT H =
-      BaseLayer.addModuleSet(std::move(MSet), std::move(MM));
+      BaseLayer.addModuleSet(std::move(MSet),
+                             make_unique<SectionMemoryManager>(),
+                             std::move(Resolver));
     // Add this module to the logical module lookup.
     DylibLookup->addToLogicalModule(LogicalModule, H);
     MSI.BaseLayerModuleSetHandles.push_back(H);
diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
new file mode 100644
index 0000000..c10508c
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -0,0 +1,182 @@
+//===-- ExecutionUtils.h - Utilities for executing code in Orc --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains utilities for executing code in Orc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H
+
+#include "JITSymbol.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include <vector>
+
+namespace llvm {
+
+class ConstantArray;
+class GlobalVariable;
+class Function;
+class Module;
+class Value;
+
+namespace orc {
+
+/// @brief This iterator provides a convenient way to iterate over the elements
+///        of an llvm.global_ctors/llvm.global_dtors instance.
+///
+///   The easiest way to get hold of instances of this class is to use the
+/// getConstructors/getDestructors functions.
+class CtorDtorIterator {
+public:
+
+  /// @brief Accessor for an element of the global_ctors/global_dtors array.
+  ///
+  ///   This class provides a read-only view of the element with any casts on
+  /// the function stripped away.
+  struct Element {
+    Element(unsigned Priority, const Function *Func, const Value *Data)
+      : Priority(Priority), Func(Func), Data(Data) {}
+
+    unsigned Priority;
+    const Function *Func;
+    const Value *Data;
+  };
+
+  /// @brief Construct an iterator instance. If End is true then this iterator
+  ///        acts as the end of the range, otherwise it is the beginning.
+  CtorDtorIterator(const GlobalVariable *GV, bool End);
+
+  /// @brief Test iterators for equality.
+  bool operator==(const CtorDtorIterator &Other) const;
+
+  /// @brief Test iterators for inequality.
+  bool operator!=(const CtorDtorIterator &Other) const;
+
+  /// @brief Pre-increment iterator.
+  CtorDtorIterator& operator++();
+
+  /// @brief Post-increment iterator.
+  CtorDtorIterator operator++(int);
+
+  /// @brief Dereference iterator. The resulting value provides a read-only view
+  ///        of this element of the global_ctors/global_dtors list.
+  Element operator*() const;
+
+private:
+  const ConstantArray *InitList;
+  unsigned I;
+};
+
+/// @brief Create an iterator range over the entries of the llvm.global_ctors
+///        array.
+iterator_range<CtorDtorIterator> getConstructors(const Module &M);
+
+/// @brief Create an iterator range over the entries of the llvm.global_ctors
+///        array.
+iterator_range<CtorDtorIterator> getDestructors(const Module &M);
+
+/// @brief Convenience class for recording constructor/destructor names for
+///        later execution.
+template <typename JITLayerT>
+class CtorDtorRunner {
+public:
+
+  /// @brief Construct a CtorDtorRunner for the given range using the given
+  ///        name mangling function.
+  CtorDtorRunner(std::vector<std::string> CtorDtorNames,
+                 typename JITLayerT::ModuleSetHandleT H)
+      : CtorDtorNames(std::move(CtorDtorNames)), H(H) {}
+
+  /// @brief Run the recorded constructors/destructors through the given JIT
+  ///        layer.
+  bool runViaLayer(JITLayerT &JITLayer) const {
+    typedef void (*CtorDtorTy)();
+
+    bool Error = false;
+    for (const auto &CtorDtorName : CtorDtorNames)
+      if (auto CtorDtorSym = JITLayer.findSymbolIn(H, CtorDtorName, false)) {
+        CtorDtorTy CtorDtor =
+          reinterpret_cast<CtorDtorTy>(
+            static_cast<uintptr_t>(CtorDtorSym.getAddress()));
+        CtorDtor();
+      } else
+        Error = true;
+    return !Error;
+  }
+
+private:
+  std::vector<std::string> CtorDtorNames;
+  typename JITLayerT::ModuleSetHandleT H;
+};
+
+/// @brief Support class for static dtor execution. For hosted (in-process) JITs
+///        only!
+///
+///   If a __cxa_atexit function isn't found C++ programs that use static
+/// destructors will fail to link. However, we don't want to use the host
+/// process's __cxa_atexit, because it will schedule JIT'd destructors to run
+/// after the JIT has been torn down, which is no good. This class makes it easy
+/// to override __cxa_atexit (and the related __dso_handle).
+///
+///   To use, clients should manually call searchOverrides from their symbol
+/// resolver. This should generally be done after attempting symbol resolution
+/// inside the JIT, but before searching the host process's symbol table. When
+/// the client determines that destructors should be run (generally at JIT
+/// teardown or after a return from main), the runDestructors method should be
+/// called.
+class LocalCXXRuntimeOverrides {
+public:
+
+  /// Create a runtime-overrides class.
+  template <typename MangleFtorT>
+  LocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
+    addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
+    addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
+  }
+
+  /// Search overrided symbols.
+  RuntimeDyld::SymbolInfo searchOverrides(const std::string &Name) {
+    auto I = CXXRuntimeOverrides.find(Name);
+    if (I != CXXRuntimeOverrides.end())
+      return RuntimeDyld::SymbolInfo(I->second, JITSymbolFlags::Exported);
+    return nullptr;
+  }
+
+  /// Run any destructors recorded by the overriden __cxa_atexit function
+  /// (CXAAtExitOverride).
+  void runDestructors();
+
+private:
+
+  template <typename PtrTy>
+  TargetAddress toTargetAddress(PtrTy* P) {
+    return static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(P));
+  }
+
+  void addOverride(const std::string &Name, TargetAddress Addr) {
+    CXXRuntimeOverrides.insert(std::make_pair(Name, Addr));
+  }
+
+  StringMap<TargetAddress> CXXRuntimeOverrides;
+
+  typedef void (*DestructorPtr)(void*);
+  typedef std::pair<DestructorPtr, void*> CXXDestructorDataPair;
+  typedef std::vector<CXXDestructorDataPair> CXXDestructorDataPairList;
+  CXXDestructorDataPairList DSOHandleOverride;
+  static int CXAAtExitOverride(DestructorPtr Destructor, void *Arg,
+                               void *DSOHandle);
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H
diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index 6a47622..6379022 100644
--- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -52,14 +52,16 @@ public:
   /// @brief Set an ObjectCache to query before compiling.
   void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; }
 
-  /// @brief Compile each module in the given module set, then then add the
-  ///        resulting set of objects to the base layer, along with the memory
-  //         manager MM.
+  /// @brief Compile each module in the given module set, then add the resulting
+  ///        set of objects to the base layer along with the memory manager and
+  ///        symbol resolver.
   ///
   /// @return A handle for the added modules.
-  template <typename ModuleSetT>
+  template <typename ModuleSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
   ModuleSetHandleT addModuleSet(ModuleSetT Ms,
-                                std::unique_ptr<RTDyldMemoryManager> MM) {
+                                MemoryManagerPtrT MemMgr,
+                                SymbolResolverPtrT Resolver) {
     OwningObjectVec Objects;
     OwningBufferVec Buffers;
 
@@ -81,7 +83,7 @@ public:
     }
 
     ModuleSetHandleT H =
-      BaseLayer.addObjectSet(Objects, std::move(MM));
+      BaseLayer.addObjectSet(Objects, std::move(MemMgr), std::move(Resolver));
 
     BaseLayer.takeOwnershipOfBuffers(H, std::move(Buffers));
 
diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
new file mode 100644
index 0000000..4dabb9a
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -0,0 +1,101 @@
+//===----- IRTransformLayer.h - Run all IR through a functor ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Run all IR passed in through a user supplied functor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
+#define LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
+
+#include "JITSymbol.h"
+
+namespace llvm {
+namespace orc {
+
+/// @brief IR mutating layer.
+///
+///   This layer accepts sets of LLVM IR Modules (via addModuleSet). It
+/// immediately applies the user supplied functor to each module, then adds
+/// the set of transformed modules to the layer below.
+template <typename BaseLayerT, typename TransformFtor>
+class IRTransformLayer {
+public:
+  /// @brief Handle to a set of added modules.
+  typedef typename BaseLayerT::ModuleSetHandleT ModuleSetHandleT;
+
+  /// @brief Construct an IRTransformLayer with the given BaseLayer
+  IRTransformLayer(BaseLayerT &BaseLayer,
+                   TransformFtor Transform = TransformFtor())
+    : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
+  /// @brief Apply the transform functor to each module in the module set, then
+  ///        add the resulting set of modules to the base layer, along with the
+  ///        memory manager and symbol resolver.
+  ///
+  /// @return A handle for the added modules.
+  template <typename ModuleSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
+  ModuleSetHandleT addModuleSet(ModuleSetT Ms,
+                                MemoryManagerPtrT MemMgr,
+                                SymbolResolverPtrT Resolver) {
+
+    for (auto I = Ms.begin(), E = Ms.end(); I != E; ++I)
+      *I = Transform(std::move(*I));
+
+    return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr),
+                                  std::move(Resolver));
+  }
+
+  /// @brief Remove the module set associated with the handle H.
+  void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); }
+
+  /// @brief Search for the given named symbol.
+  /// @param Name The name of the symbol to search for.
+  /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+  /// @return A handle for the given named symbol, if it exists.
+  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
+  }
+
+  /// @brief Get the address of the given symbol in the context of the set of
+  ///        modules represented by the handle H. This call is forwarded to the
+  ///        base layer's implementation.
+  /// @param H The handle for the module set to search in.
+  /// @param Name The name of the symbol to search for.
+  /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+  /// @return A handle for the given named symbol, if it is found in the
+  ///         given module set.
+  JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name,
+                         bool ExportedSymbolsOnly) {
+    return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly);
+  }
+
+  /// @brief Immediately emit and finalize the module set represented by the
+  ///        given handle.
+  /// @param H Handle for module set to emit/finalize.
+  void emitAndFinalize(ModuleSetHandleT H) {
+    BaseLayer.emitAndFinalize(H);
+  }
+
+  /// @brief Access the transform functor directly.
+  TransformFtor& getTransform() { return Transform; }
+
+  /// @brief Access the mumate functor directly.
+  const TransformFtor& getTransform() const { return Transform; }
+
+private:
+  BaseLayerT &BaseLayer;
+  TransformFtor Transform;
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 8ce1d4d..7b4f611 100644
--- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -16,6 +16,7 @@
 
 #include "JITSymbol.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
@@ -37,11 +38,11 @@ public:
   ///        the compile and update actions for the callback.
   class CompileCallbackInfo {
   public:
-    CompileCallbackInfo(Constant *Addr, CompileFtor &Compile,
+    CompileCallbackInfo(TargetAddress Addr, CompileFtor &Compile,
                         UpdateFtor &Update)
       : Addr(Addr), Compile(Compile), Update(Update) {}
 
-    Constant* getAddress() const { return Addr; }
+    TargetAddress getAddress() const { return Addr; }
     void setCompileAction(CompileFtor Compile) {
       this->Compile = std::move(Compile);
     }
@@ -49,7 +50,7 @@ public:
       this->Update = std::move(Update);
     }
   private:
-    Constant *Addr;
+    TargetAddress Addr;
     CompileFtor &Compile;
     UpdateFtor &Update;
   };
@@ -94,7 +95,7 @@ public:
   }
 
   /// @brief Get/create a compile callback with the given signature.
-  virtual CompileCallbackInfo getCompileCallback(FunctionType &FT) = 0;
+  virtual CompileCallbackInfo getCompileCallback(LLVMContext &Context) = 0;
 
 protected:
 
@@ -125,27 +126,23 @@ public:
   ///                               there is no existing callback trampoline.
   ///                               (Trampolines are allocated in blocks for
   ///                               efficiency.)
-  JITCompileCallbackManager(JITLayerT &JIT, LLVMContext &Context,
+  JITCompileCallbackManager(JITLayerT &JIT, RuntimeDyld::MemoryManager &MemMgr,
+                            LLVMContext &Context,
                             TargetAddress ErrorHandlerAddress,
                             unsigned NumTrampolinesPerBlock)
     : JITCompileCallbackManagerBase(ErrorHandlerAddress,
                                     NumTrampolinesPerBlock),
-      JIT(JIT) {
+      JIT(JIT), MemMgr(MemMgr) {
     emitResolverBlock(Context);
   }
 
   /// @brief Get/create a compile callback with the given signature.
-  CompileCallbackInfo getCompileCallback(FunctionType &FT) final {
-    TargetAddress TrampolineAddr = getAvailableTrampolineAddr(FT.getContext());
+  CompileCallbackInfo getCompileCallback(LLVMContext &Context) final {
+    TargetAddress TrampolineAddr = getAvailableTrampolineAddr(Context);
     auto &CallbackHandler =
       this->ActiveTrampolines[TrampolineAddr];
-    Constant *AddrIntVal =
-      ConstantInt::get(Type::getInt64Ty(FT.getContext()), TrampolineAddr);
-    Constant *AddrPtrVal =
-      ConstantExpr::getCast(Instruction::IntToPtr, AddrIntVal,
-                            PointerType::get(&FT, 0));
 
-    return CompileCallbackInfo(AddrPtrVal, CallbackHandler.Compile,
+    return CompileCallbackInfo(TrampolineAddr, CallbackHandler.Compile,
                                CallbackHandler.Update);
   }
 
@@ -162,7 +159,9 @@ private:
     std::unique_ptr<Module> M(new Module("resolver_block_module",
                                          Context));
     TargetT::insertResolverBlock(*M, *this);
-    auto H = JIT.addModuleSet(SingletonSet(std::move(M)), nullptr);
+    auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr,
+                              static_cast<RuntimeDyld::SymbolResolver*>(
+                                  nullptr));
     JIT.emitAndFinalize(H);
     auto ResolverBlockSymbol =
       JIT.findSymbolIn(H, TargetT::ResolverBlockName, false);
@@ -187,7 +186,9 @@ private:
       TargetT::insertCompileCallbackTrampolines(*M, ResolverBlockAddr,
                                                 this->NumTrampolinesPerBlock,
                                                 this->ActiveTrampolines.size());
-    auto H = JIT.addModuleSet(SingletonSet(std::move(M)), nullptr);
+    auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr,
+                              static_cast<RuntimeDyld::SymbolResolver*>(
+                                  nullptr));
     JIT.emitAndFinalize(H);
     for (unsigned I = 0; I < this->NumTrampolinesPerBlock; ++I) {
       std::string Name = GetLabelName(I);
@@ -198,10 +199,11 @@ private:
   }
 
   JITLayerT &JIT;
+  RuntimeDyld::MemoryManager &MemMgr;
   TargetAddress ResolverBlockAddr;
 };
 
-/// @brief Get an update functor for updating the value of a named function
+/// @brief Get an update functor that updates the value of a named function
 ///        pointer.
 template <typename JITLayerT>
 JITCompileCallbackManagerBase::UpdateFtor
@@ -217,13 +219,26 @@ getLocalFPUpdater(JITLayerT &JIT, typename JITLayerT::ModuleSetHandleT H,
     };
   }
 
-GlobalVariable* createImplPointer(Function &F, const Twine &Name,
-                                  Constant *Initializer);
+/// @brief Build a function pointer of FunctionType with the given constant
+///        address.
+///
+///   Usage example: Turn a trampoline address into a function pointer constant
+/// for use in a stub.
+Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr);
 
+/// @brief Create a function pointer with the given type, name, and initializer
+///        in the given Module.
+GlobalVariable* createImplPointer(PointerType &PT, Module &M,
+                                  const Twine &Name, Constant *Initializer);
+
+/// @brief Turn a function declaration into a stub function that makes an
+///        indirect call using the given function pointer.
 void makeStub(Function &F, GlobalVariable &ImplPointer);
 
 typedef std::map<Module*, DenseSet<const GlobalValue*>> ModulePartitionMap;
 
+/// @brief Extract subsections of a Module into the given Module according to
+///        the given ModulePartitionMap.
 void partition(Module &M, const ModulePartitionMap &PMap);
 
 /// @brief Struct for trivial "complete" partitioning of a module.
@@ -239,6 +254,7 @@ public:
         Functions(std::move(S.Functions)) {}
 };
 
+/// @brief Extract every function in M into a separate module.
 FullyPartitionedModule fullyPartition(Module &M);
 
 } // End namespace orc.
diff --git a/include/llvm/ExecutionEngine/Orc/JITSymbol.h b/include/llvm/ExecutionEngine/Orc/JITSymbol.h
index 7c3ad56..422a376 100644
--- a/include/llvm/ExecutionEngine/Orc/JITSymbol.h
+++ b/include/llvm/ExecutionEngine/Orc/JITSymbol.h
@@ -27,7 +27,7 @@ typedef uint64_t TargetAddress;
 
 /// @brief Represents a symbol in the JIT.
 class JITSymbol : public JITSymbolBase {
-public:  
+public:
 
   typedef std::function<TargetAddress()> GetAddressFtor;
 
diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
new file mode 100644
index 0000000..faa2365
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
@@ -0,0 +1,62 @@
+//===-- LambdaResolverMM - Redirect symbol lookup via a functor -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//   Defines a RuntimeDyld::SymbolResolver subclass that uses a user-supplied
+// functor for symbol resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H
+#define LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+template <typename ExternalLookupFtorT, typename DylibLookupFtorT>
+class LambdaResolver : public RuntimeDyld::SymbolResolver {
+public:
+
+  LambdaResolver(ExternalLookupFtorT ExternalLookupFtor,
+                 DylibLookupFtorT DylibLookupFtor)
+      : ExternalLookupFtor(ExternalLookupFtor),
+        DylibLookupFtor(DylibLookupFtor) {}
+
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) final {
+    return ExternalLookupFtor(Name);
+  }
+
+  RuntimeDyld::SymbolInfo
+  findSymbolInLogicalDylib(const std::string &Name) final {
+    return DylibLookupFtor(Name);
+  }
+
+private:
+  ExternalLookupFtorT ExternalLookupFtor;
+  DylibLookupFtorT DylibLookupFtor;
+};
+
+template <typename ExternalLookupFtorT,
+          typename DylibLookupFtorT>
+std::unique_ptr<LambdaResolver<ExternalLookupFtorT, DylibLookupFtorT>>
+createLambdaResolver(ExternalLookupFtorT ExternalLookupFtor,
+                     DylibLookupFtorT DylibLookupFtor) {
+  typedef LambdaResolver<ExternalLookupFtorT, DylibLookupFtorT> LR;
+  return make_unique<LR>(std::move(ExternalLookupFtor),
+                         std::move(DylibLookupFtor));
+}
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H
diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index ac5fccf..71c83f7 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -15,11 +15,11 @@
 #define LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H
 
 #include "JITSymbol.h"
-#include "LookasideRTDyldMM.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include <list>
 
@@ -94,10 +94,11 @@ private:
       BaseLayer.emitAndFinalize(Handle);
     }
 
-    template <typename ModuleSetT>
+    template <typename ModuleSetT, typename MemoryManagerPtrT,
+              typename SymbolResolverPtrT>
     static std::unique_ptr<EmissionDeferredSet>
-    create(BaseLayerT &B, ModuleSetT Ms,
-           std::unique_ptr<RTDyldMemoryManager> MM);
+    create(BaseLayerT &B, ModuleSetT Ms, MemoryManagerPtrT MemMgr,
+           SymbolResolverPtrT Resolver);
 
   protected:
     virtual const GlobalValue* searchGVs(StringRef Name,
@@ -109,12 +110,15 @@ private:
     BaseLayerHandleT Handle;
   };
 
-  template <typename ModuleSetT>
+  template <typename ModuleSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
   class EmissionDeferredSetImpl : public EmissionDeferredSet {
   public:
     EmissionDeferredSetImpl(ModuleSetT Ms,
-                            std::unique_ptr<RTDyldMemoryManager> MM)
-        : Ms(std::move(Ms)), MM(std::move(MM)) {}
+                            MemoryManagerPtrT MemMgr,
+                            SymbolResolverPtrT Resolver)
+        : Ms(std::move(Ms)), MemMgr(std::move(MemMgr)),
+          Resolver(std::move(Resolver)) {}
 
   protected:
 
@@ -145,7 +149,8 @@ private:
       // We don't need the mangled names set any more: Once we've emitted this
       // to the base layer we'll just look for symbols there.
       MangledSymbols.reset();
-      return BaseLayer.addModuleSet(std::move(Ms), std::move(MM));
+      return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr),
+                                    std::move(Resolver));
     }
 
   private:
@@ -206,7 +211,8 @@ private:
     }
 
     ModuleSetT Ms;
-    std::unique_ptr<RTDyldMemoryManager> MM;
+    MemoryManagerPtrT MemMgr;
+    SymbolResolverPtrT Resolver;
     mutable std::unique_ptr<StringMap<const GlobalValue*>> MangledSymbols;
   };
 
@@ -223,12 +229,15 @@ public:
   LazyEmittingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
 
   /// @brief Add the given set of modules to the lazy emitting layer.
-  template <typename ModuleSetT>
+  template <typename ModuleSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
   ModuleSetHandleT addModuleSet(ModuleSetT Ms,
-                                std::unique_ptr<RTDyldMemoryManager> MM) {
+                                MemoryManagerPtrT MemMgr,
+                                SymbolResolverPtrT Resolver) {
     return ModuleSetList.insert(
         ModuleSetList.end(),
-        EmissionDeferredSet::create(BaseLayer, std::move(Ms), std::move(MM)));
+        EmissionDeferredSet::create(BaseLayer, std::move(Ms), std::move(MemMgr),
+                                    std::move(Resolver)));
   }
 
   /// @brief Remove the module set represented by the given handle.
@@ -277,12 +286,16 @@ public:
 };
 
 template <typename BaseLayerT>
-template <typename ModuleSetT>
+template <typename ModuleSetT, typename MemoryManagerPtrT,
+          typename SymbolResolverPtrT>
 std::unique_ptr<typename LazyEmittingLayer<BaseLayerT>::EmissionDeferredSet>
 LazyEmittingLayer<BaseLayerT>::EmissionDeferredSet::create(
-    BaseLayerT &B, ModuleSetT Ms, std::unique_ptr<RTDyldMemoryManager> MM) {
-  return llvm::make_unique<EmissionDeferredSetImpl<ModuleSetT>>(std::move(Ms),
-                                                                std::move(MM));
+    BaseLayerT &B, ModuleSetT Ms, MemoryManagerPtrT MemMgr,
+    SymbolResolverPtrT Resolver) {
+  typedef EmissionDeferredSetImpl<ModuleSetT, MemoryManagerPtrT, SymbolResolverPtrT>
+    EDS;
+  return llvm::make_unique<EDS>(std::move(Ms), std::move(MemMgr),
+                                std::move(Resolver));
 }
 
 } // End namespace orc.
diff --git a/include/llvm/ExecutionEngine/Orc/LookasideRTDyldMM.h b/include/llvm/ExecutionEngine/Orc/LookasideRTDyldMM.h
deleted file mode 100644
index 4456404..0000000
--- a/include/llvm/ExecutionEngine/Orc/LookasideRTDyldMM.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- LookasideRTDyldMM - Redirect symbol lookup via a functor -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//   Defines an adapter for RuntimeDyldMM that allows lookups for external
-// symbols to go via a functor.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LOOKASIDERTDYLDMM_H
-#define LLVM_EXECUTIONENGINE_ORC_LOOKASIDERTDYLDMM_H
-
-#include "llvm/ADT/STLExtras.h"
-#include <memory>
-#include <vector>
-
-namespace llvm {
-namespace orc {
-
-/// @brief Defines an adapter for RuntimeDyldMM that allows lookups for external
-///        symbols to go via a functor, before falling back to the lookup logic
-///        provided by the underlying RuntimeDyldMM instance.
-///
-///   This class is useful for redirecting symbol lookup back to various layers
-/// of a JIT component stack, e.g. to enable lazy module emission.
-///
-template <typename BaseRTDyldMM, typename ExternalLookupFtor,
-          typename DylibLookupFtor>
-class LookasideRTDyldMM : public BaseRTDyldMM {
-public:
-  /// @brief Create a LookasideRTDyldMM intance.
-  LookasideRTDyldMM(ExternalLookupFtor ExternalLookup,
-                    DylibLookupFtor DylibLookup)
-      : ExternalLookup(std::move(ExternalLookup)),
-        DylibLookup(std::move(DylibLookup)) {}
-
-  /// @brief Look up the given symbol address, first via the functor this
-  ///        instance was created with, then (if the symbol isn't found)
-  ///        via the underlying RuntimeDyldMM.
-  uint64_t getSymbolAddress(const std::string &Name) override {
-    if (uint64_t Addr = ExternalLookup(Name))
-      return Addr;
-    return BaseRTDyldMM::getSymbolAddress(Name);
-  }
-
-  uint64_t getSymbolAddressInLogicalDylib(const std::string &Name) override {
-    if (uint64_t Addr = DylibLookup(Name))
-      return Addr;
-    return BaseRTDyldMM::getSymbolAddressInLogicalDylib(Name);
-  };
-
-  /// @brief Get a reference to the ExternalLookup functor.
-  ExternalLookupFtor &getExternalLookup() { return ExternalLookup; }
-
-  /// @brief Get a const-reference to the ExternalLookup functor.
-  const ExternalLookupFtor &getExternalLookup() const { return ExternalLookup; }
-
-  /// @brief Get a reference to the DylibLookup functor.
-  DylibLookupFtor &getDylibLookup() { return DylibLookup; }
-
-  /// @brief Get a const-reference to the DylibLookup functor.
-  const DylibLookupFtor &getDylibLookup() const { return DylibLookup; }
-
-private:
-  ExternalLookupFtor ExternalLookup;
-  DylibLookupFtor DylibLookup;
-};
-
-/// @brief Create a LookasideRTDyldMM from a base memory manager type, an
-///        external lookup functor, and a dylib lookup functor.
-template <typename BaseRTDyldMM, typename ExternalLookupFtor,
-          typename DylibLookupFtor>
-std::unique_ptr<
-    LookasideRTDyldMM<BaseRTDyldMM, ExternalLookupFtor, DylibLookupFtor>>
-createLookasideRTDyldMM(ExternalLookupFtor &&ExternalLookup,
-                        DylibLookupFtor &&DylibLookup) {
-  typedef LookasideRTDyldMM<BaseRTDyldMM, ExternalLookupFtor, DylibLookupFtor>
-      ThisLookasideMM;
-  return llvm::make_unique<ThisLookasideMM>(
-      std::forward<ExternalLookupFtor>(ExternalLookup),
-      std::forward<DylibLookupFtor>(DylibLookup));
-}
-
-} // End namespace orc.
-} // End namespace llvm.
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LOOKASIDERTDYLDMM_H
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index 9838991..f3094da 100644
--- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -15,7 +15,7 @@
 #define LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
 
 #include "JITSymbol.h"
-#include "LookasideRTDyldMM.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include <list>
@@ -38,13 +38,12 @@ protected:
     LinkedObjectSet(const LinkedObjectSet&) = delete;
     void operator=(const LinkedObjectSet&) = delete;
   public:
-    LinkedObjectSet(std::unique_ptr<RTDyldMemoryManager> MM)
-        : MM(std::move(MM)), RTDyld(llvm::make_unique<RuntimeDyld>(&*this->MM)),
+    LinkedObjectSet(RuntimeDyld::MemoryManager &MemMgr,
+                    RuntimeDyld::SymbolResolver &Resolver)
+        : RTDyld(llvm::make_unique<RuntimeDyld>(MemMgr, Resolver)),
           State(Raw) {}
 
-    // MSVC 2012 cannot infer a move constructor, so write it out longhand.
-    LinkedObjectSet(LinkedObjectSet &&O)
-        : MM(std::move(O.MM)), RTDyld(std::move(O.RTDyld)), State(O.State) {}
+    virtual ~LinkedObjectSet() {}
 
     std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
     addObject(const object::ObjectFile &Obj) {
@@ -57,14 +56,7 @@ protected:
 
     bool NeedsFinalization() const { return (State == Raw); }
 
-    void Finalize() {
-      State = Finalizing;
-      RTDyld->resolveRelocations();
-      RTDyld->registerEHFrames();
-      MM->finalizeMemory();
-      OwnedBuffers.clear();
-      State = Finalized;
-    }
+    virtual void Finalize() = 0;
 
     void mapSectionAddress(const void *LocalAddress, TargetAddress TargetAddr) {
       assert((State != Finalized) &&
@@ -76,8 +68,7 @@ protected:
       OwnedBuffers.push_back(std::move(B));
     }
 
-  private:
-    std::unique_ptr<RTDyldMemoryManager> MM;
+  protected:
     std::unique_ptr<RuntimeDyld> RTDyld;
     enum { Raw, Finalizing, Finalized } State;
 
@@ -87,7 +78,7 @@ protected:
     std::vector<std::unique_ptr<MemoryBuffer>> OwnedBuffers;
   };
 
-  typedef std::list<LinkedObjectSet> LinkedObjectSetListT;
+  typedef std::list<std::unique_ptr<LinkedObjectSet>> LinkedObjectSetListT;
 
 public:
   /// @brief Handle to a set of loaded objects.
@@ -99,7 +90,7 @@ public:
   template <typename OwningMBSet>
   void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
     for (auto &MB : MBs)
-      H->takeOwnershipOfBuffer(std::move(MB));
+      (*H)->takeOwnershipOfBuffer(std::move(MB));
   }
 
 };
@@ -120,6 +111,37 @@ public:
 /// symbols.
 template <typename NotifyLoadedFtor = DoNothingOnNotifyLoaded>
 class ObjectLinkingLayer : public ObjectLinkingLayerBase {
+private:
+
+  template <typename MemoryManagerPtrT, typename SymbolResolverPtrT>
+  class ConcreteLinkedObjectSet : public LinkedObjectSet {
+  public:
+    ConcreteLinkedObjectSet(MemoryManagerPtrT MemMgr,
+                            SymbolResolverPtrT Resolver)
+      : LinkedObjectSet(*MemMgr, *Resolver), MemMgr(std::move(MemMgr)),
+        Resolver(std::move(Resolver)) { }
+
+    void Finalize() override {
+      State = Finalizing;
+      RTDyld->resolveRelocations();
+      RTDyld->registerEHFrames();
+      MemMgr->finalizeMemory();
+      OwnedBuffers.clear();
+      State = Finalized;
+    }
+
+  private:
+    MemoryManagerPtrT MemMgr;
+    SymbolResolverPtrT Resolver;
+  };
+
+  template <typename MemoryManagerPtrT, typename SymbolResolverPtrT>
+  std::unique_ptr<LinkedObjectSet>
+  createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) {
+    typedef ConcreteLinkedObjectSet<MemoryManagerPtrT, SymbolResolverPtrT> LOS;
+    return llvm::make_unique<LOS>(std::move(MemMgr), std::move(Resolver));
+  }
+
 public:
 
   /// @brief LoadedObjectInfo list. Contains a list of owning pointers to
@@ -127,21 +149,16 @@ public:
   typedef std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>
       LoadedObjInfoList;
 
-  /// @brief Functor to create RTDyldMemoryManager instances.
-  typedef std::function<std::unique_ptr<RTDyldMemoryManager>()> CreateRTDyldMMFtor;
-
   /// @brief Functor for receiving finalization notifications.
   typedef std::function<void(ObjSetHandleT)> NotifyFinalizedFtor;
 
   /// @brief Construct an ObjectLinkingLayer with the given NotifyLoaded,
-  ///        NotifyFinalized and CreateMemoryManager functors.
+  ///        and NotifyFinalized functors.
   ObjectLinkingLayer(
-      CreateRTDyldMMFtor CreateMemoryManager = CreateRTDyldMMFtor(),
       NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
       NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor())
       : NotifyLoaded(std::move(NotifyLoaded)),
-        NotifyFinalized(std::move(NotifyFinalized)),
-        CreateMemoryManager(std::move(CreateMemoryManager)) {}
+        NotifyFinalized(std::move(NotifyFinalized)) {}
 
   /// @brief Add a set of objects (or archives) that will be treated as a unit
   ///        for the purposes of symbol lookup and memory management.
@@ -154,19 +171,18 @@ public:
   ///   This version of this method allows the client to pass in an
   /// RTDyldMemoryManager instance that will be used to allocate memory and look
   /// up external symbol addresses for the given objects.
-  template <typename ObjSetT>
+  template <typename ObjSetT,
+            typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
   ObjSetHandleT addObjectSet(const ObjSetT &Objects,
-                             std::unique_ptr<RTDyldMemoryManager> MM) {
-
-    if (!MM) {
-      assert(CreateMemoryManager &&
-             "No memory manager or memory manager creator provided.");
-      MM = CreateMemoryManager();
-    }
-
-    ObjSetHandleT Handle = LinkedObjSetList.insert(
-        LinkedObjSetList.end(), LinkedObjectSet(std::move(MM)));
-    LinkedObjectSet &LOS = *Handle;
+                             MemoryManagerPtrT MemMgr,
+                             SymbolResolverPtrT Resolver) {
+    ObjSetHandleT Handle =
+      LinkedObjSetList.insert(
+        LinkedObjSetList.end(),
+        createLinkedObjectSet(std::move(MemMgr), std::move(Resolver)));
+
+    LinkedObjectSet &LOS = **Handle;
     LoadedObjInfoList LoadedObjInfos;
 
     for (auto &Obj : Objects)
@@ -212,11 +228,11 @@ public:
   ///         given object set.
   JITSymbol findSymbolIn(ObjSetHandleT H, StringRef Name,
                          bool ExportedSymbolsOnly) {
-    if (auto Sym = H->getSymbol(Name)) {
+    if (auto Sym = (*H)->getSymbol(Name)) {
       if (Sym.isExported() || !ExportedSymbolsOnly) {
         auto Addr = Sym.getAddress();
         auto Flags = Sym.getFlags();
-        if (!H->NeedsFinalization()) {
+        if (!(*H)->NeedsFinalization()) {
           // If this instance has already been finalized then we can just return
           // the address.
           return JITSymbol(Addr, Flags);
@@ -225,10 +241,10 @@ public:
           // it. The functor still needs to double-check whether finalization is
           // required, in case someone else finalizes this set before the
           // functor is called.
-          auto GetAddress = 
+          auto GetAddress =
             [this, Addr, H]() {
-              if (H->NeedsFinalization()) {
-                H->Finalize();
+              if ((*H)->NeedsFinalization()) {
+                (*H)->Finalize();
                 if (NotifyFinalized)
                   NotifyFinalized(H);
               }
@@ -244,14 +260,14 @@ public:
   /// @brief Map section addresses for the objects associated with the handle H.
   void mapSectionAddress(ObjSetHandleT H, const void *LocalAddress,
                          TargetAddress TargetAddr) {
-    H->mapSectionAddress(LocalAddress, TargetAddr);
+    (*H)->mapSectionAddress(LocalAddress, TargetAddr);
   }
 
   /// @brief Immediately emit and finalize the object set represented by the
   ///        given handle.
   /// @param H Handle for object set to emit/finalize.
   void emitAndFinalize(ObjSetHandleT H) {
-    H->Finalize();
+    (*H)->Finalize();
     if (NotifyFinalized)
       NotifyFinalized(H);
   }
@@ -260,7 +276,6 @@ private:
   LinkedObjectSetListT LinkedObjSetList;
   NotifyLoadedFtor NotifyLoaded;
   NotifyFinalizedFtor NotifyFinalized;
-  CreateRTDyldMMFtor CreateMemoryManager;
 };
 
 } // End namespace orc.
diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
index 792a499..207bad0 100644
--- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H
 #define LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H
 
+#include "RuntimeDyld.h"
 #include "llvm-c/ExecutionEngine.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CBindingWrapping.h"
@@ -27,89 +28,91 @@ class ExecutionEngine;
     class ObjectFile;
   }
 
+class MCJITMemoryManager : public RuntimeDyld::MemoryManager {
+public:
+  /// This method is called after an object has been loaded into memory but
+  /// before relocations are applied to the loaded sections.  The object load
+  /// may have been initiated by MCJIT to resolve an external symbol for another
+  /// object that is being finalized.  In that case, the object about which
+  /// the memory manager is being notified will be finalized immediately after
+  /// the memory manager returns from this call.
+  ///
+  /// Memory managers which are preparing code for execution in an external
+  /// address space can use this call to remap the section addresses for the
+  /// newly loaded object.
+  virtual void notifyObjectLoaded(ExecutionEngine *EE,
+                                  const object::ObjectFile &) {}
+};
+
 // RuntimeDyld clients often want to handle the memory management of
 // what gets placed where. For JIT clients, this is the subset of
 // JITMemoryManager required for dynamic loading of binaries.
 //
 // FIXME: As the RuntimeDyld fills out, additional routines will be needed
 //        for the varying types of objects to be allocated.
-class RTDyldMemoryManager {
+class RTDyldMemoryManager : public MCJITMemoryManager,
+                            public RuntimeDyld::SymbolResolver {
   RTDyldMemoryManager(const RTDyldMemoryManager&) = delete;
   void operator=(const RTDyldMemoryManager&) = delete;
 public:
   RTDyldMemoryManager() {}
-  virtual ~RTDyldMemoryManager();
-
-  /// Allocate a memory block of (at least) the given size suitable for
-  /// executable code. The SectionID is a unique identifier assigned by the JIT
-  /// engine, and optionally recorded by the memory manager to access a loaded
-  /// section.
-  virtual uint8_t *allocateCodeSection(
-    uintptr_t Size, unsigned Alignment, unsigned SectionID,
-    StringRef SectionName) = 0;
-
-  /// Allocate a memory block of (at least) the given size suitable for data.
-  /// The SectionID is a unique identifier assigned by the JIT engine, and
-  /// optionally recorded by the memory manager to access a loaded section.
-  virtual uint8_t *allocateDataSection(
-    uintptr_t Size, unsigned Alignment, unsigned SectionID,
-    StringRef SectionName, bool IsReadOnly) = 0;
-
-  /// Inform the memory manager about the total amount of memory required to
-  /// allocate all sections to be loaded:
-  /// \p CodeSize - the total size of all code sections
-  /// \p DataSizeRO - the total size of all read-only data sections
-  /// \p DataSizeRW - the total size of all read-write data sections
-  /// 
-  /// Note that by default the callback is disabled. To enable it
-  /// redefine the method needsToReserveAllocationSpace to return true.
-  virtual void reserveAllocationSpace(
-    uintptr_t CodeSize, uintptr_t DataSizeRO, uintptr_t DataSizeRW) { }
-  
-  /// Override to return true to enable the reserveAllocationSpace callback.
-  virtual bool needsToReserveAllocationSpace() { return false; }
-
-  /// Register the EH frames with the runtime so that c++ exceptions work.
-  ///
-  /// \p Addr parameter provides the local address of the EH frame section
-  /// data, while \p LoadAddr provides the address of the data in the target
-  /// address space.  If the section has not been remapped (which will usually
-  /// be the case for local execution) these two values will be the same.
-  virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size);
+  ~RTDyldMemoryManager() override;
 
-  virtual void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size);
+  void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override;
+  void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override;
 
   /// This method returns the address of the specified function or variable in
   /// the current process.
   static uint64_t getSymbolAddressInProcess(const std::string &Name);
 
+  /// Legacy symbol lookup - DEPRECATED! Please override findSymbol instead.
+  ///
   /// This method returns the address of the specified function or variable.
   /// It is used to resolve symbols during module linking.
   virtual uint64_t getSymbolAddress(const std::string &Name) {
     return getSymbolAddressInProcess(Name);
   }
 
-  /// This method returns the address of the specified symbol if it exists
-  /// within the logical dynamic library represented by this
-  /// RTDyldMemoryManager. Unlike getSymbolAddress, queries through this
-  /// interface should return addresses for hidden symbols.
+  /// This method returns a RuntimeDyld::SymbolInfo for the specified function
+  /// or variable. It is used to resolve symbols during module linking.
+  ///
+  /// By default this falls back on the legacy lookup method:
+  /// 'getSymbolAddress'. The address returned by getSymbolAddress is treated as
+  /// a strong, exported symbol, consistent with historical treatment by
+  /// RuntimeDyld.
   ///
-  /// This is of particular importance for the Orc JIT APIs, which support lazy
-  /// compilation by breaking up modules: Each of those broken out modules
-  /// must be able to resolve hidden symbols provided by the others. Clients
-  /// writing memory managers for MCJIT can usually ignore this method.
+  /// Clients writing custom RTDyldMemoryManagers are encouraged to override
+  /// this method and return a SymbolInfo with the flags set correctly. This is
+  /// necessary for RuntimeDyld to correctly handle weak and non-exported symbols.
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+    return RuntimeDyld::SymbolInfo(getSymbolAddress(Name),
+                                   JITSymbolFlags::Exported);
+  }
+
+  /// Legacy symbol lookup -- DEPRECATED! Please override
+  /// findSymbolInLogicalDylib instead.
   ///
-  /// This method will be queried by RuntimeDyld when checking for previous
-  /// definitions of common symbols. It will *not* be queried by default when
-  /// resolving external symbols (this minimises the link-time overhead for
-  /// MCJIT clients who don't care about Orc features). If you are writing a
-  /// RTDyldMemoryManager for Orc and want "external" symbol resolution to
-  /// search the logical dylib, you should override your getSymbolAddress
-  /// method call this method directly.
+  /// Default to treating all modules as separate.
   virtual uint64_t getSymbolAddressInLogicalDylib(const std::string &Name) {
     return 0;
   }
 
+  /// Default to treating all modules as separate.
+  ///
+  /// By default this falls back on the legacy lookup method:
+  /// 'getSymbolAddressInLogicalDylib'. The address returned by
+  /// getSymbolAddressInLogicalDylib is treated as a strong, exported symbol,
+  /// consistent with historical treatment by RuntimeDyld.
+  ///
+  /// Clients writing custom RTDyldMemoryManagers are encouraged to override
+  /// this method and return a SymbolInfo with the flags set correctly. This is
+  /// necessary for RuntimeDyld to correctly handle weak and non-exported symbols.
+  RuntimeDyld::SymbolInfo
+  findSymbolInLogicalDylib(const std::string &Name) override {
+    return RuntimeDyld::SymbolInfo(getSymbolAddressInLogicalDylib(Name),
+                                   JITSymbolFlags::Exported);
+  }
+
   /// This method returns the address of the specified function. As such it is
   /// only useful for resolving library symbols, not code generated symbols.
   ///
@@ -121,30 +124,6 @@ public:
   /// MCJIT or RuntimeDyld.  Use getSymbolAddress instead.
   virtual void *getPointerToNamedFunction(const std::string &Name,
                                           bool AbortOnFailure = true);
-
-  /// This method is called after an object has been loaded into memory but
-  /// before relocations are applied to the loaded sections.  The object load
-  /// may have been initiated by MCJIT to resolve an external symbol for another
-  /// object that is being finalized.  In that case, the object about which
-  /// the memory manager is being notified will be finalized immediately after
-  /// the memory manager returns from this call.
-  ///
-  /// Memory managers which are preparing code for execution in an external
-  /// address space can use this call to remap the section addresses for the
-  /// newly loaded object.
-  virtual void notifyObjectLoaded(ExecutionEngine *EE,
-                                  const object::ObjectFile &) {}
-
-  /// This method is called when object loading is complete and section page
-  /// permissions can be applied.  It is up to the memory manager implementation
-  /// to decide whether or not to act on this method.  The memory manager will
-  /// typically allocate all sections as read-write and then apply specific
-  /// permissions when this method is called.  Code sections cannot be executed
-  /// until this function has been called.  In addition, any cache coherency
-  /// operations needed to reliably use the memory are also performed.
-  ///
-  /// Returns true if an error occurred, false otherwise.
-  virtual bool finalizeMemory(std::string *ErrMsg = nullptr) = 0;
 };
 
 // Create wrappers for C Binding types (see CBindingWrapping.h).
@@ -153,4 +132,5 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(
 
 } // namespace llvm
 
+
 #endif
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index fe0ccda..5723f05 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -16,7 +16,6 @@
 
 #include "JITSymbolFlags.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/Support/Memory.h"
 #include <memory>
 
@@ -29,19 +28,13 @@ namespace object {
 
 class RuntimeDyldImpl;
 class RuntimeDyldCheckerImpl;
- 
+
 class RuntimeDyld {
   friend class RuntimeDyldCheckerImpl;
 
   RuntimeDyld(const RuntimeDyld &) = delete;
   void operator=(const RuntimeDyld &) = delete;
 
-  // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
-  // interface.
-  std::unique_ptr<RuntimeDyldImpl> Dyld;
-  RTDyldMemoryManager *MM;
-  bool ProcessAllSections;
-  RuntimeDyldCheckerImpl *Checker;
 protected:
   // Change the address associated with a section when resolving relocations.
   // Any relocations already associated with the symbol will be re-resolved.
@@ -82,7 +75,101 @@ public:
     unsigned BeginIdx, EndIdx;
   };
 
-  RuntimeDyld(RTDyldMemoryManager *);
+  /// \brief Memory Management.
+  class MemoryManager {
+  public:
+    virtual ~MemoryManager() {};
+
+    /// Allocate a memory block of (at least) the given size suitable for
+    /// executable code. The SectionID is a unique identifier assigned by the
+    /// RuntimeDyld instance, and optionally recorded by the memory manager to
+    /// access a loaded section.
+    virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                         unsigned SectionID,
+                                         StringRef SectionName) = 0;
+
+    /// Allocate a memory block of (at least) the given size suitable for data.
+    /// The SectionID is a unique identifier assigned by the JIT engine, and
+    /// optionally recorded by the memory manager to access a loaded section.
+    virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                         unsigned SectionID,
+                                         StringRef SectionName,
+                                         bool IsReadOnly) = 0;
+
+    /// Inform the memory manager about the total amount of memory required to
+    /// allocate all sections to be loaded:
+    /// \p CodeSize - the total size of all code sections
+    /// \p DataSizeRO - the total size of all read-only data sections
+    /// \p DataSizeRW - the total size of all read-write data sections
+    ///
+    /// Note that by default the callback is disabled. To enable it
+    /// redefine the method needsToReserveAllocationSpace to return true.
+    virtual void reserveAllocationSpace(uintptr_t CodeSize,
+                                        uintptr_t DataSizeRO,
+                                        uintptr_t DataSizeRW) {}
+
+    /// Override to return true to enable the reserveAllocationSpace callback.
+    virtual bool needsToReserveAllocationSpace() { return false; }
+
+    /// Register the EH frames with the runtime so that c++ exceptions work.
+    ///
+    /// \p Addr parameter provides the local address of the EH frame section
+    /// data, while \p LoadAddr provides the address of the data in the target
+    /// address space.  If the section has not been remapped (which will usually
+    /// be the case for local execution) these two values will be the same.
+    virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+                                  size_t Size) = 0;
+    virtual void deregisterEHFrames(uint8_t *addr, uint64_t LoadAddr,
+                                    size_t Size) = 0;
+
+    /// This method is called when object loading is complete and section page
+    /// permissions can be applied.  It is up to the memory manager implementation
+    /// to decide whether or not to act on this method.  The memory manager will
+    /// typically allocate all sections as read-write and then apply specific
+    /// permissions when this method is called.  Code sections cannot be executed
+    /// until this function has been called.  In addition, any cache coherency
+    /// operations needed to reliably use the memory are also performed.
+    ///
+    /// Returns true if an error occurred, false otherwise.
+    virtual bool finalizeMemory(std::string *ErrMsg = nullptr) = 0;
+
+  private:
+    virtual void anchor();
+  };
+
+  /// \brief Symbol resolution.
+  class SymbolResolver {
+  public:
+    virtual ~SymbolResolver() {};
+
+    /// This method returns the address of the specified function or variable.
+    /// It is used to resolve symbols during module linking.
+    virtual SymbolInfo findSymbol(const std::string &Name) = 0;
+
+    /// This method returns the address of the specified symbol if it exists
+    /// within the logical dynamic library represented by this
+    /// RTDyldMemoryManager. Unlike getSymbolAddress, queries through this
+    /// interface should return addresses for hidden symbols.
+    ///
+    /// This is of particular importance for the Orc JIT APIs, which support lazy
+    /// compilation by breaking up modules: Each of those broken out modules
+    /// must be able to resolve hidden symbols provided by the others. Clients
+    /// writing memory managers for MCJIT can usually ignore this method.
+    ///
+    /// This method will be queried by RuntimeDyld when checking for previous
+    /// definitions of common symbols. It will *not* be queried by default when
+    /// resolving external symbols (this minimises the link-time overhead for
+    /// MCJIT clients who don't care about Orc features). If you are writing a
+    /// RTDyldMemoryManager for Orc and want "external" symbol resolution to
+    /// search the logical dylib, you should override your getSymbolAddress
+    /// method call this method directly.
+    virtual SymbolInfo findSymbolInLogicalDylib(const std::string &Name) = 0;
+  private:
+    virtual void anchor();
+  };
+
+  /// \brief Construct a RuntimeDyld instance.
+  RuntimeDyld(MemoryManager &MemMgr, SymbolResolver &Resolver);
   ~RuntimeDyld();
 
   /// Add the referenced object file to the list of objects to be loaded and
@@ -131,6 +218,15 @@ public:
     assert(!Dyld && "setProcessAllSections must be called before loadObject.");
     this->ProcessAllSections = ProcessAllSections;
   }
+
+private:
+  // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
+  // interface.
+  std::unique_ptr<RuntimeDyldImpl> Dyld;
+  MemoryManager &MemMgr;
+  SymbolResolver &Resolver;
+  bool ProcessAllSections;
+  RuntimeDyldCheckerImpl *Checker;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
index b825aff..0b0dcb0 100644
--- a/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -16,7 +16,7 @@
 #define LLVM_EXECUTIONENGINE_SECTIONMEMORYMANAGER_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Memory.h"
 
@@ -40,7 +40,7 @@ class SectionMemoryManager : public RTDyldMemoryManager {
 
 public:
   SectionMemoryManager() { }
-  virtual ~SectionMemoryManager();
+  ~SectionMemoryManager() override;
 
   /// \brief Allocates a memory block of (at least) the given size suitable for
   /// executable code.
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 443892b..d0d1b5e 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -89,6 +89,7 @@ public:
                            ///< often, so lazy binding isn't worthwhile
     NonNull,               ///< Pointer is known to be not null
     Dereferenceable,       ///< Pointer is known to be dereferenceable
+    DereferenceableOrNull, ///< Pointer is either null or dereferenceable
     NoRedZone,             ///< Disable redzone
     NoReturn,              ///< Mark the function as not returning
     NoUnwind,              ///< Function doesn't unwind stack
@@ -136,6 +137,8 @@ public:
   static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align);
   static Attribute getWithDereferenceableBytes(LLVMContext &Context,
                                               uint64_t Bytes);
+  static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context,
+                                                     uint64_t Bytes);
 
   //===--------------------------------------------------------------------===//
   // Attribute Accessors
@@ -185,6 +188,10 @@ public:
   /// dereferenceable attribute (or zero if unknown).
   uint64_t getDereferenceableBytes() const;
 
+  /// \brief Returns the number of dereferenceable_or_null bytes from the
+  /// dereferenceable_or_null attribute (or zero if unknown).
+  uint64_t getDereferenceableOrNullBytes() const;
+
   /// \brief The Attribute is converted to a string of equivalent mnemonic. This
   /// is, presumably, for writing out the mnemonics for the assembly writer.
   std::string getAsString(bool InAttrGrp = false) const;
@@ -287,6 +294,12 @@ public:
   AttributeSet addDereferenceableAttr(LLVMContext &C, unsigned Index,
                                       uint64_t Bytes) const;
 
+  /// \brief Add the dereferenceable_or_null attribute to the attribute set at
+  /// the given index. Since attribute sets are immutable, this returns a new
+  /// set.
+  AttributeSet addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
+                                            uint64_t Bytes) const;
+
   //===--------------------------------------------------------------------===//
   // AttributeSet Accessors
   //===--------------------------------------------------------------------===//
@@ -331,6 +344,10 @@ public:
   /// \brief Get the number of dereferenceable bytes (or zero if unknown).
   uint64_t getDereferenceableBytes(unsigned Index) const;
 
+  /// \brief Get the number of dereferenceable_or_null bytes (or zero if
+  /// unknown).
+  uint64_t getDereferenceableOrNullBytes(unsigned Index) const;
+
   /// \brief Return the attributes at the index as a string.
   std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
 
@@ -411,6 +428,7 @@ class AttrBuilder {
   uint64_t Alignment;
   uint64_t StackAlignment;
   uint64_t DerefBytes;
+  uint64_t DerefOrNullBytes;
 public:
   AttrBuilder() : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) {}
   explicit AttrBuilder(uint64_t Val)
@@ -476,6 +494,10 @@ public:
   /// attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableBytes() const { return DerefBytes; }
 
+  /// \brief Retrieve the number of dereferenceable_or_null bytes, if the
+  /// dereferenceable_or_null attribute exists (zero is returned otherwise).
+  uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
+
   /// \brief This turns an int alignment (which must be a power of 2) into the
   /// form used internally in Attribute.
   AttrBuilder &addAlignmentAttr(unsigned Align);
@@ -488,6 +510,10 @@ public:
   /// internally in Attribute.
   AttrBuilder &addDereferenceableAttr(uint64_t Bytes);
 
+  /// \brief This turns the number of dereferenceable_or_null bytes into the
+  /// form used internally in Attribute.
+  AttrBuilder &addDereferenceableOrNullAttr(uint64_t Bytes);
+
   /// \brief Return true if the builder contains no target-independent
   /// attributes.
   bool empty() const { return Attrs.none(); }
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 8f5cdeb..a71946e 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -28,33 +28,25 @@ class LandingPadInst;
 class TerminatorInst;
 class LLVMContext;
 class BlockAddress;
+class Function;
 
-template<> struct ilist_traits<Instruction>
-  : public SymbolTableListTraits<Instruction, BasicBlock> {
+// Traits for intrusive list of basic blocks...
+template<> struct ilist_traits<BasicBlock>
+  : public SymbolTableListTraits<BasicBlock, Function> {
 
-  /// \brief Return a node that marks the end of a list.
-  ///
-  /// The sentinel is relative to this instance, so we use a non-static
-  /// method.
-  Instruction *createSentinel() const {
-    // Since i(p)lists always publicly derive from their corresponding traits,
-    // placing a data member in this class will augment the i(p)list.  But since
-    // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
-    // there is a legal viable downcast from it to NodeTy. We use this trick to
-    // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
-    // sentinel. Dereferencing the sentinel is forbidden (save the
-    // ilist_node<NodeTy>), so no one will ever notice the superposition.
-    return static_cast<Instruction*>(&Sentinel);
-  }
-  static void destroySentinel(Instruction*) {}
+  BasicBlock *createSentinel() const;
+  static void destroySentinel(BasicBlock*) {}
+
+  BasicBlock *provideInitialHead() const { return createSentinel(); }
+  BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); }
+  static void noteHead(BasicBlock*, BasicBlock*) {}
 
-  Instruction *provideInitialHead() const { return createSentinel(); }
-  Instruction *ensureHead(Instruction*) const { return createSentinel(); }
-  static void noteHead(Instruction*, Instruction*) {}
+  static ValueSymbolTable *getSymTab(Function *ItemParent);
 private:
-  mutable ilist_half_node<Instruction> Sentinel;
+  mutable ilist_half_node<BasicBlock> Sentinel;
 };
 
+
 /// \brief LLVM Basic Block Representation
 ///
 /// This represents a single basic block in LLVM. A basic block is simply a
@@ -113,7 +105,7 @@ public:
                             BasicBlock *InsertBefore = nullptr) {
     return new BasicBlock(Context, Name, Parent, InsertBefore);
   }
-  ~BasicBlock();
+  ~BasicBlock() override;
 
   /// \brief Return the enclosing method, or null if none.
   const Function *getParent() const { return Parent; }
@@ -176,7 +168,9 @@ public:
   void removeFromParent();
 
   /// \brief Unlink 'this' from the containing function and delete it.
-  void eraseFromParent();
+  ///
+  // \returns an iterator pointing to the element after the erased one.
+  iplist<BasicBlock>::iterator eraseFromParent();
 
   /// \brief Unlink this basic block from its current function and insert it
   /// into the function that \p MovePos lives in, right before \p MovePos.
@@ -332,6 +326,12 @@ private:
   }
 };
 
+// createSentinel is used to get hold of the node that marks the end of the
+// list... (same trick used here as in ilist_traits<Instruction>)
+inline BasicBlock *ilist_traits<BasicBlock>::createSentinel() const {
+    return static_cast<BasicBlock*>(&Sentinel);
+}
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
 
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index 9c87936..ebace33 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -46,12 +46,13 @@ template <typename FunTy = const Function,
 class CallSiteBase {
 protected:
   PointerIntPair<InstrTy*, 1, bool> I;
-public:
+
   CallSiteBase() : I(nullptr, false) {}
   CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); }
   CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); }
-  CallSiteBase(ValTy *II) { *this = get(II); }
-protected:
+  explicit CallSiteBase(ValTy *II) { *this = get(II); }
+
+private:
   /// CallSiteBase::get - This static method is sort of like a constructor.  It
   /// will create an appropriate call site for a Call or Invoke instruction, but
   /// it can also create a null initialized CallSiteBase object for something
@@ -349,15 +350,13 @@ private:
 
 class CallSite : public CallSiteBase<Function, Value, User, Instruction,
                                      CallInst, InvokeInst, User::op_iterator> {
-  typedef CallSiteBase<Function, Value, User, Instruction,
-                       CallInst, InvokeInst, User::op_iterator> Base;
 public:
   CallSite() {}
-  CallSite(Base B) : Base(B) {}
-  CallSite(Value* V) : Base(V) {}
-  CallSite(CallInst *CI) : Base(CI) {}
-  CallSite(InvokeInst *II) : Base(II) {}
-  CallSite(Instruction *II) : Base(II) {}
+  CallSite(CallSiteBase B) : CallSiteBase(B) {}
+  CallSite(CallInst *CI) : CallSiteBase(CI) {}
+  CallSite(InvokeInst *II) : CallSiteBase(II) {}
+  explicit CallSite(Instruction *II) : CallSiteBase(II) {}
+  explicit CallSite(Value *V) : CallSiteBase(V) {}
 
   bool operator==(const CallSite &CS) const { return I == CS.I; }
   bool operator!=(const CallSite &CS) const { return I != CS.I; }
@@ -371,13 +370,13 @@ private:
 
 /// ImmutableCallSite - establish a view to a call site for examination
 class ImmutableCallSite : public CallSiteBase<> {
-  typedef CallSiteBase<> Base;
 public:
-  ImmutableCallSite(const Value* V) : Base(V) {}
-  ImmutableCallSite(const CallInst *CI) : Base(CI) {}
-  ImmutableCallSite(const InvokeInst *II) : Base(II) {}
-  ImmutableCallSite(const Instruction *II) : Base(II) {}
-  ImmutableCallSite(CallSite CS) : Base(CS.getInstruction()) {}
+  ImmutableCallSite() {}
+  ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
+  ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
+  explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
+  explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
+  ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/IR/ConstantFolder.h b/include/llvm/IR/ConstantFolder.h
index e271a14..fb6ca3b 100644
--- a/include/llvm/IR/ConstantFolder.h
+++ b/include/llvm/IR/ConstantFolder.h
@@ -118,34 +118,35 @@ public:
   // Memory Instructions
   //===--------------------------------------------------------------------===//
 
-  Constant *CreateGetElementPtr(Constant *C,
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C,
                                 ArrayRef<Constant *> IdxList) const {
-    return ConstantExpr::getGetElementPtr(C, IdxList);
+    return ConstantExpr::getGetElementPtr(Ty, C, IdxList);
   }
-  Constant *CreateGetElementPtr(Constant *C, Constant *Idx) const {
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C, Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return ConstantExpr::getGetElementPtr(C, Idx);
+    return ConstantExpr::getGetElementPtr(Ty, C, Idx);
   }
-  Constant *CreateGetElementPtr(Constant *C,
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C,
                                 ArrayRef<Value *> IdxList) const {
-    return ConstantExpr::getGetElementPtr(C, IdxList);
+    return ConstantExpr::getGetElementPtr(Ty, C, IdxList);
   }
 
-  Constant *CreateInBoundsGetElementPtr(Constant *C,
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         ArrayRef<Constant *> IdxList) const {
-    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList);
+    return ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList);
   }
-  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant *Idx) const {
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
+                                        Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return ConstantExpr::getInBoundsGetElementPtr(C, Idx);
+    return ConstantExpr::getInBoundsGetElementPtr(Ty, C, Idx);
   }
-  Constant *CreateInBoundsGetElementPtr(Constant *C,
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         ArrayRef<Value *> IdxList) const {
-    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList);
+    return ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList);
   }
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index 59be653..70437e6 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -547,7 +547,7 @@ class ConstantDataSequential : public Constant {
 protected:
   explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data)
     : Constant(ty, VT, nullptr, 0), DataElements(Data), Next(nullptr) {}
-  ~ConstantDataSequential() { delete Next; }
+  ~ConstantDataSequential() override { delete Next; }
 
   static Constant *getImpl(StringRef Bytes, Type *Ty);
 
@@ -1057,41 +1057,43 @@ public:
   /// all elements must be Constant's.
   ///
   /// \param OnlyIfReducedTy see \a getWithOperands() docs.
-  static Constant *getGetElementPtr(Constant *C, ArrayRef<Constant *> IdxList,
+  static Constant *getGetElementPtr(Type *Ty, Constant *C,
+                                    ArrayRef<Constant *> IdxList,
                                     bool InBounds = false,
                                     Type *OnlyIfReducedTy = nullptr) {
     return getGetElementPtr(
-        C, makeArrayRef((Value * const *)IdxList.data(), IdxList.size()),
+        Ty, C, makeArrayRef((Value * const *)IdxList.data(), IdxList.size()),
         InBounds, OnlyIfReducedTy);
   }
-  static Constant *getGetElementPtr(Constant *C, Constant *Idx,
+  static Constant *getGetElementPtr(Type *Ty, Constant *C, Constant *Idx,
                                     bool InBounds = false,
                                     Type *OnlyIfReducedTy = nullptr) {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return getGetElementPtr(C, cast<Value>(Idx), InBounds, OnlyIfReducedTy);
+    return getGetElementPtr(Ty, C, cast<Value>(Idx), InBounds, OnlyIfReducedTy);
   }
-  static Constant *getGetElementPtr(Constant *C, ArrayRef<Value *> IdxList,
+  static Constant *getGetElementPtr(Type *Ty, Constant *C,
+                                    ArrayRef<Value *> IdxList,
                                     bool InBounds = false,
                                     Type *OnlyIfReducedTy = nullptr);
 
   /// Create an "inbounds" getelementptr. See the documentation for the
   /// "inbounds" flag in LangRef.html for details.
-  static Constant *getInBoundsGetElementPtr(Constant *C,
+  static Constant *getInBoundsGetElementPtr(Type *Ty, Constant *C,
                                             ArrayRef<Constant *> IdxList) {
-    return getGetElementPtr(C, IdxList, true);
+    return getGetElementPtr(Ty, C, IdxList, true);
   }
-  static Constant *getInBoundsGetElementPtr(Constant *C,
+  static Constant *getInBoundsGetElementPtr(Type *Ty, Constant *C,
                                             Constant *Idx) {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return getGetElementPtr(C, Idx, true);
+    return getGetElementPtr(Ty, C, Idx, true);
   }
-  static Constant *getInBoundsGetElementPtr(Constant *C,
+  static Constant *getInBoundsGetElementPtr(Type *Ty, Constant *C,
                                             ArrayRef<Value *> IdxList) {
-    return getGetElementPtr(C, IdxList, true);
+    return getGetElementPtr(Ty, C, IdxList, true);
   }
 
   static Constant *getExtractElement(Constant *Vec, Constant *Idx,
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index 97a7b83..db87654 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -30,38 +30,17 @@ namespace llvm {
   class Value;
   class Constant;
   class LLVMContext;
-  class MDNode;
   class StringRef;
-  class DIBasicType;
-  class DICompileUnit;
-  class DICompositeType;
-  class DIDerivedType;
-  class DIDescriptor;
-  class DIFile;
-  class DIEnumerator;
-  class DIType;
-  class DIGlobalVariable;
-  class DIImportedEntity;
-  class DINameSpace;
-  class DIVariable;
-  class DISubrange;
-  class DILexicalBlockFile;
-  class DILexicalBlock;
-  class DIScope;
-  class DISubprogram;
-  class DITemplateTypeParameter;
-  class DITemplateValueParameter;
-  class DIObjCProperty;
 
   class DIBuilder {
     Module &M;
     LLVMContext &VMContext;
 
-    MDNode *TempEnumTypes;
-    MDNode *TempRetainTypes;
-    MDNode *TempSubprograms;
-    MDNode *TempGVs;
-    MDNode *TempImportedModules;
+    TempMDTuple TempEnumTypes;
+    TempMDTuple TempRetainTypes;
+    TempMDTuple TempSubprograms;
+    TempMDTuple TempGVs;
+    TempMDTuple TempImportedModules;
 
     Function *DeclareFn;     // llvm.dbg.declare
     Function *ValueFn;       // llvm.dbg.value
@@ -125,26 +104,25 @@ namespace llvm {
     ///                        source location information in the back end
     ///                        without actually changing the output (e.g.,
     ///                        when using optimization remarks).
-    DICompileUnit createCompileUnit(unsigned Lang, StringRef File,
-                                    StringRef Dir, StringRef Producer,
-                                    bool isOptimized, StringRef Flags,
-                                    unsigned RV,
-                                    StringRef SplitName = StringRef(),
-                                    DebugEmissionKind Kind = FullDebug,
-                                    bool EmitDebugInfo = true);
+    MDCompileUnit *createCompileUnit(unsigned Lang, StringRef File,
+                                     StringRef Dir, StringRef Producer,
+                                     bool isOptimized, StringRef Flags,
+                                     unsigned RV, StringRef SplitName = "",
+                                     DebugEmissionKind Kind = FullDebug,
+                                     bool EmitDebugInfo = true);
 
     /// createFile - Create a file descriptor to hold debugging information
     /// for a file.
-    DIFile createFile(StringRef Filename, StringRef Directory);
+    MDFile *createFile(StringRef Filename, StringRef Directory);
 
     /// createEnumerator - Create a single enumerator value.
-    DIEnumerator createEnumerator(StringRef Name, int64_t Val);
+    MDEnumerator *createEnumerator(StringRef Name, int64_t Val);
 
     /// \brief Create a DWARF unspecified type.
-    DIBasicType createUnspecifiedType(StringRef Name);
+    MDBasicType *createUnspecifiedType(StringRef Name);
 
     /// \brief Create C++11 nullptr type.
-    DIBasicType createNullPtrType();
+    MDBasicType *createNullPtrType();
 
     /// createBasicType - Create debugging information entry for a basic
     /// type.
@@ -152,36 +130,36 @@ namespace llvm {
     /// @param SizeInBits  Size of the type.
     /// @param AlignInBits Type alignment.
     /// @param Encoding    DWARF encoding code, e.g. dwarf::DW_ATE_float.
-    DIBasicType createBasicType(StringRef Name, uint64_t SizeInBits,
-                                uint64_t AlignInBits, unsigned Encoding);
+    MDBasicType *createBasicType(StringRef Name, uint64_t SizeInBits,
+                                 uint64_t AlignInBits, unsigned Encoding);
 
     /// createQualifiedType - Create debugging information entry for a qualified
     /// type, e.g. 'const int'.
     /// @param Tag         Tag identifing type, e.g. dwarf::TAG_volatile_type
     /// @param FromTy      Base Type.
-    DIDerivedType createQualifiedType(unsigned Tag, DIType FromTy);
+    MDDerivedType *createQualifiedType(unsigned Tag, MDType *FromTy);
 
     /// createPointerType - Create debugging information entry for a pointer.
     /// @param PointeeTy   Type pointed by this pointer.
     /// @param SizeInBits  Size.
     /// @param AlignInBits Alignment. (optional)
     /// @param Name        Pointer type name. (optional)
-    DIDerivedType
-    createPointerType(DIType PointeeTy, uint64_t SizeInBits,
-                      uint64_t AlignInBits = 0, StringRef Name = StringRef());
+    MDDerivedType *createPointerType(MDType *PointeeTy, uint64_t SizeInBits,
+                                     uint64_t AlignInBits = 0,
+                                     StringRef Name = "");
 
     /// \brief Create debugging information entry for a pointer to member.
     /// @param PointeeTy Type pointed to by this pointer.
     /// @param SizeInBits  Size.
     /// @param AlignInBits Alignment. (optional)
     /// @param Class Type for which this pointer points to members of.
-    DIDerivedType createMemberPointerType(DIType PointeeTy, DIType Class,
-                                          uint64_t SizeInBits,
-                                          uint64_t AlignInBits = 0);
+    MDDerivedType *createMemberPointerType(MDType *PointeeTy, MDType *Class,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits = 0);
 
     /// createReferenceType - Create debugging information entry for a c++
     /// style reference or rvalue reference type.
-    DIDerivedType createReferenceType(unsigned Tag, DIType RTy);
+    MDDerivedType *createReferenceType(unsigned Tag, MDType *RTy);
 
     /// createTypedef - Create debugging information entry for a typedef.
     /// @param Ty          Original type.
@@ -189,11 +167,11 @@ namespace llvm {
     /// @param File        File where this type is defined.
     /// @param LineNo      Line number.
     /// @param Context     The surrounding context for the typedef.
-    DIDerivedType createTypedef(DIType Ty, StringRef Name, DIFile File,
-                                unsigned LineNo, DIDescriptor Context);
+    MDDerivedType *createTypedef(MDType *Ty, StringRef Name, MDFile *File,
+                                 unsigned LineNo, MDScope *Context);
 
     /// createFriend - Create debugging information entry for a 'friend'.
-    DIDerivedType createFriend(DIType Ty, DIType FriendTy);
+    MDDerivedType *createFriend(MDType *Ty, MDType *FriendTy);
 
     /// createInheritance - Create debugging information entry to establish
     /// inheritance relationship between two types.
@@ -202,8 +180,8 @@ namespace llvm {
     /// @param BaseOffset   Base offset.
     /// @param Flags        Flags to describe inheritance attribute,
     ///                     e.g. private
-    DIDerivedType createInheritance(DIType Ty, DIType BaseTy,
-                                    uint64_t BaseOffset, unsigned Flags);
+    MDDerivedType *createInheritance(MDType *Ty, MDType *BaseTy,
+                                     uint64_t BaseOffset, unsigned Flags);
 
     /// createMemberType - Create debugging information entry for a member.
     /// @param Scope        Member scope.
@@ -215,10 +193,11 @@ namespace llvm {
     /// @param OffsetInBits Member offset.
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Ty           Parent type.
-    DIDerivedType
-    createMemberType(DIDescriptor Scope, StringRef Name, DIFile File,
-                     unsigned LineNo, uint64_t SizeInBits, uint64_t AlignInBits,
-                     uint64_t OffsetInBits, unsigned Flags, DIType Ty);
+    MDDerivedType *createMemberType(MDScope *Scope, StringRef Name,
+                                    MDFile *File, unsigned LineNo,
+                                    uint64_t SizeInBits, uint64_t AlignInBits,
+                                    uint64_t OffsetInBits, unsigned Flags,
+                                    MDType *Ty);
 
     /// createStaticMemberType - Create debugging information entry for a
     /// C++ static data member.
@@ -229,10 +208,10 @@ namespace llvm {
     /// @param Ty         Type of the static member.
     /// @param Flags      Flags to encode member attribute, e.g. private.
     /// @param Val        Const initializer of the member.
-    DIDerivedType createStaticMemberType(DIDescriptor Scope, StringRef Name,
-                                         DIFile File, unsigned LineNo,
-                                         DIType Ty, unsigned Flags,
-                                         llvm::Constant *Val);
+    MDDerivedType *createStaticMemberType(MDScope *Scope, StringRef Name,
+                                          MDFile *File, unsigned LineNo,
+                                          MDType *Ty, unsigned Flags,
+                                          llvm::Constant *Val);
 
     /// createObjCIVar - Create debugging information entry for Objective-C
     /// instance variable.
@@ -245,11 +224,10 @@ namespace llvm {
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Ty           Parent type.
     /// @param PropertyNode Property associated with this ivar.
-    DIDerivedType createObjCIVar(StringRef Name, DIFile File,
-                                 unsigned LineNo, uint64_t SizeInBits,
-                                 uint64_t AlignInBits, uint64_t OffsetInBits,
-                                 unsigned Flags, DIType Ty,
-                                 MDNode *PropertyNode);
+    MDDerivedType *createObjCIVar(StringRef Name, MDFile *File, unsigned LineNo,
+                                  uint64_t SizeInBits, uint64_t AlignInBits,
+                                  uint64_t OffsetInBits, unsigned Flags,
+                                  MDType *Ty, MDNode *PropertyNode);
 
     /// createObjCProperty - Create debugging information entry for Objective-C
     /// property.
@@ -260,12 +238,11 @@ namespace llvm {
     /// @param SetterName   Name of the Objective C property setter selector.
     /// @param PropertyAttributes Objective C property attributes.
     /// @param Ty           Type.
-    DIObjCProperty createObjCProperty(StringRef Name,
-                                      DIFile File, unsigned LineNumber,
-                                      StringRef GetterName,
-                                      StringRef SetterName,
-                                      unsigned PropertyAttributes,
-                                      DIType Ty);
+    MDObjCProperty *createObjCProperty(StringRef Name, MDFile *File,
+                                       unsigned LineNumber,
+                                       StringRef GetterName,
+                                       StringRef SetterName,
+                                       unsigned PropertyAttributes, MDType *Ty);
 
     /// createClassType - Create debugging information entry for a class.
     /// @param Scope        Scope in which this class is defined.
@@ -283,14 +260,14 @@ namespace llvm {
     ///                     for more info.
     /// @param TemplateParms Template type parameters.
     /// @param UniqueIdentifier A unique identifier for the class.
-    DICompositeType createClassType(DIDescriptor Scope, StringRef Name,
-                                    DIFile File, unsigned LineNumber,
-                                    uint64_t SizeInBits, uint64_t AlignInBits,
-                                    uint64_t OffsetInBits, unsigned Flags,
-                                    DIType DerivedFrom, DIArray Elements,
-                                    DIType VTableHolder = DIType(),
-                                    MDNode *TemplateParms = nullptr,
-                                    StringRef UniqueIdentifier = StringRef());
+    MDCompositeType *createClassType(MDScope *Scope, StringRef Name,
+                                     MDFile *File, unsigned LineNumber,
+                                     uint64_t SizeInBits, uint64_t AlignInBits,
+                                     uint64_t OffsetInBits, unsigned Flags,
+                                     MDType *DerivedFrom, DIArray Elements,
+                                     MDType *VTableHolder = nullptr,
+                                     MDNode *TemplateParms = nullptr,
+                                     StringRef UniqueIdentifier = "");
 
     /// createStructType - Create debugging information entry for a struct.
     /// @param Scope        Scope in which this struct is defined.
@@ -303,13 +280,11 @@ namespace llvm {
     /// @param Elements     Struct elements.
     /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
     /// @param UniqueIdentifier A unique identifier for the struct.
-    DICompositeType createStructType(DIDescriptor Scope, StringRef Name,
-                                     DIFile File, unsigned LineNumber,
-                                     uint64_t SizeInBits, uint64_t AlignInBits,
-                                     unsigned Flags, DIType DerivedFrom,
-                                     DIArray Elements, unsigned RunTimeLang = 0,
-                                     DIType VTableHolder = DIType(),
-                                     StringRef UniqueIdentifier = StringRef());
+    MDCompositeType *createStructType(
+        MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNumber,
+        uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+        MDType *DerivedFrom, DIArray Elements, unsigned RunTimeLang = 0,
+        MDType *VTableHolder = nullptr, StringRef UniqueIdentifier = "");
 
     /// createUnionType - Create debugging information entry for an union.
     /// @param Scope        Scope in which this union is defined.
@@ -322,19 +297,20 @@ namespace llvm {
     /// @param Elements     Union elements.
     /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
     /// @param UniqueIdentifier A unique identifier for the union.
-    DICompositeType createUnionType(
-        DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
-        uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
-        DIArray Elements, unsigned RunTimeLang = 0,
-        StringRef UniqueIdentifier = StringRef());
+    MDCompositeType *createUnionType(MDScope *Scope, StringRef Name,
+                                     MDFile *File, unsigned LineNumber,
+                                     uint64_t SizeInBits, uint64_t AlignInBits,
+                                     unsigned Flags, DIArray Elements,
+                                     unsigned RunTimeLang = 0,
+                                     StringRef UniqueIdentifier = "");
 
     /// createTemplateTypeParameter - Create debugging information for template
     /// type parameter.
     /// @param Scope        Scope in which this type is defined.
     /// @param Name         Type parameter name.
     /// @param Ty           Parameter type.
-    DITemplateTypeParameter
-    createTemplateTypeParameter(DIDescriptor Scope, StringRef Name, DIType Ty);
+    MDTemplateTypeParameter *
+    createTemplateTypeParameter(MDScope *Scope, StringRef Name, MDType *Ty);
 
     /// createTemplateValueParameter - Create debugging information for template
     /// value parameter.
@@ -342,46 +318,46 @@ namespace llvm {
     /// @param Name         Value parameter name.
     /// @param Ty           Parameter type.
     /// @param Val          Constant parameter value.
-    DITemplateValueParameter createTemplateValueParameter(DIDescriptor Scope,
-                                                          StringRef Name,
-                                                          DIType Ty,
-                                                          Constant *Val);
+    MDTemplateValueParameter *createTemplateValueParameter(MDScope *Scope,
+                                                           StringRef Name,
+                                                           MDType *Ty,
+                                                           Constant *Val);
 
     /// \brief Create debugging information for a template template parameter.
     /// @param Scope        Scope in which this type is defined.
     /// @param Name         Value parameter name.
     /// @param Ty           Parameter type.
     /// @param Val          The fully qualified name of the template.
-    DITemplateValueParameter createTemplateTemplateParameter(DIDescriptor Scope,
-                                                             StringRef Name,
-                                                             DIType Ty,
-                                                             StringRef Val);
+    MDTemplateValueParameter *createTemplateTemplateParameter(MDScope *Scope,
+                                                              StringRef Name,
+                                                              MDType *Ty,
+                                                              StringRef Val);
 
     /// \brief Create debugging information for a template parameter pack.
     /// @param Scope        Scope in which this type is defined.
     /// @param Name         Value parameter name.
     /// @param Ty           Parameter type.
     /// @param Val          An array of types in the pack.
-    DITemplateValueParameter createTemplateParameterPack(DIDescriptor Scope,
-                                                         StringRef Name,
-                                                         DIType Ty,
-                                                         DIArray Val);
+    MDTemplateValueParameter *createTemplateParameterPack(MDScope *Scope,
+                                                          StringRef Name,
+                                                          MDType *Ty,
+                                                          DIArray Val);
 
     /// createArrayType - Create debugging information entry for an array.
     /// @param Size         Array size.
     /// @param AlignInBits  Alignment.
     /// @param Ty           Element type.
     /// @param Subscripts   Subscripts.
-    DICompositeType createArrayType(uint64_t Size, uint64_t AlignInBits,
-                                    DIType Ty, DIArray Subscripts);
+    MDCompositeType *createArrayType(uint64_t Size, uint64_t AlignInBits,
+                                     MDType *Ty, DIArray Subscripts);
 
     /// createVectorType - Create debugging information entry for a vector type.
     /// @param Size         Array size.
     /// @param AlignInBits  Alignment.
     /// @param Ty           Element type.
     /// @param Subscripts   Subscripts.
-    DICompositeType createVectorType(uint64_t Size, uint64_t AlignInBits,
-                                     DIType Ty, DIArray Subscripts);
+    MDCompositeType *createVectorType(uint64_t Size, uint64_t AlignInBits,
+                                      MDType *Ty, DIArray Subscripts);
 
     /// createEnumerationType - Create debugging information entry for an
     /// enumeration.
@@ -394,10 +370,10 @@ namespace llvm {
     /// @param Elements       Enumeration elements.
     /// @param UnderlyingType Underlying type of a C++11/ObjC fixed enum.
     /// @param UniqueIdentifier A unique identifier for the enum.
-    DICompositeType createEnumerationType(DIDescriptor Scope, StringRef Name,
-        DIFile File, unsigned LineNumber, uint64_t SizeInBits,
-        uint64_t AlignInBits, DIArray Elements, DIType UnderlyingType,
-        StringRef UniqueIdentifier = StringRef());
+    MDCompositeType *createEnumerationType(
+        MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNumber,
+        uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
+        MDType *UnderlyingType, StringRef UniqueIdentifier = "");
 
     /// createSubroutineType - Create subroutine type.
     /// @param File            File in which this subroutine is defined.
@@ -405,39 +381,39 @@ namespace llvm {
     ///                        includes return type at 0th index.
     /// @param Flags           E.g.: LValueReference.
     ///                        These flags are used to emit dwarf attributes.
-    DISubroutineType createSubroutineType(DIFile File,
-                                          DITypeArray ParameterTypes,
-                                          unsigned Flags = 0);
+    MDSubroutineType *createSubroutineType(MDFile *File,
+                                           DITypeArray ParameterTypes,
+                                           unsigned Flags = 0);
 
-    /// createArtificialType - Create a new DIType with "artificial" flag set.
-    DIType createArtificialType(DIType Ty);
+    /// createArtificialType - Create a new MDType* with "artificial" flag set.
+    MDType *createArtificialType(MDType *Ty);
 
-    /// createObjectPointerType - Create a new DIType with the "object pointer"
+    /// createObjectPointerType - Create a new MDType* with the "object pointer"
     /// flag set.
-    DIType createObjectPointerType(DIType Ty);
+    MDType *createObjectPointerType(MDType *Ty);
 
     /// \brief Create a permanent forward-declared type.
-    DICompositeType createForwardDecl(unsigned Tag, StringRef Name,
-                                      DIDescriptor Scope, DIFile F,
-                                      unsigned Line, unsigned RuntimeLang = 0,
-                                      uint64_t SizeInBits = 0,
-                                      uint64_t AlignInBits = 0,
-                                      StringRef UniqueIdentifier = StringRef());
+    MDCompositeType *createForwardDecl(unsigned Tag, StringRef Name,
+                                       MDScope *Scope, MDFile *F, unsigned Line,
+                                       unsigned RuntimeLang = 0,
+                                       uint64_t SizeInBits = 0,
+                                       uint64_t AlignInBits = 0,
+                                       StringRef UniqueIdentifier = "");
 
     /// \brief Create a temporary forward-declared type.
-    DICompositeType createReplaceableCompositeType(
-        unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F,
-        unsigned Line, unsigned RuntimeLang = 0, uint64_t SizeInBits = 0,
-        uint64_t AlignInBits = 0, unsigned Flags = DIDescriptor::FlagFwdDecl,
-        StringRef UniqueIdentifier = StringRef());
+    MDCompositeType *createReplaceableCompositeType(
+        unsigned Tag, StringRef Name, MDScope *Scope, MDFile *F, unsigned Line,
+        unsigned RuntimeLang = 0, uint64_t SizeInBits = 0,
+        uint64_t AlignInBits = 0, unsigned Flags = DebugNode::FlagFwdDecl,
+        StringRef UniqueIdentifier = "");
 
-    /// retainType - Retain DIType in a module even if it is not referenced
+    /// retainType - Retain MDType* in a module even if it is not referenced
     /// through debug info anchors.
-    void retainType(DIType T);
+    void retainType(MDType *T);
 
     /// createUnspecifiedParameter - Create unspecified parameter type
     /// for a subroutine type.
-    DIBasicType createUnspecifiedParameter();
+    MDBasicType *createUnspecifiedParameter();
 
     /// getOrCreateArray - Get a DIArray, create one if required.
     DIArray getOrCreateArray(ArrayRef<Metadata *> Elements);
@@ -447,8 +423,7 @@ namespace llvm {
 
     /// getOrCreateSubrange - Create a descriptor for a value range.  This
     /// implicitly uniques the values returned.
-    DISubrange getOrCreateSubrange(int64_t Lo, int64_t Count);
-
+    MDSubrange *getOrCreateSubrange(int64_t Lo, int64_t Count);
 
     /// createGlobalVariable - Create a new descriptor for the specified
     /// variable.
@@ -462,19 +437,19 @@ namespace llvm {
     ///                      externally visible or not.
     /// @param Val         llvm::Value of the variable.
     /// @param Decl        Reference to the corresponding declaration.
-    DIGlobalVariable createGlobalVariable(DIDescriptor Context, StringRef Name,
-                                          StringRef LinkageName, DIFile File,
-                                          unsigned LineNo, DITypeRef Ty,
-                                          bool isLocalToUnit,
-                                          llvm::Constant *Val,
-                                          MDNode *Decl = nullptr);
+    MDGlobalVariable *createGlobalVariable(MDScope *Context, StringRef Name,
+                                           StringRef LinkageName, MDFile *File,
+                                           unsigned LineNo, MDType *Ty,
+                                           bool isLocalToUnit,
+                                           llvm::Constant *Val,
+                                           MDNode *Decl = nullptr);
 
     /// createTempGlobalVariableFwdDecl - Identical to createGlobalVariable
     /// except that the resulting DbgNode is temporary and meant to be RAUWed.
-    DIGlobalVariable createTempGlobalVariableFwdDecl(
-        DIDescriptor Context, StringRef Name, StringRef LinkageName,
-        DIFile File, unsigned LineNo, DITypeRef Ty, bool isLocalToUnit,
-        llvm::Constant *Val, MDNode *Decl = nullptr);
+    MDGlobalVariable *createTempGlobalVariableFwdDecl(
+        MDScope *Context, StringRef Name, StringRef LinkageName, MDFile *File,
+        unsigned LineNo, MDType *Ty, bool isLocalToUnit, llvm::Constant *Val,
+        MDNode *Decl = nullptr);
 
     /// createLocalVariable - Create a new descriptor for the specified
     /// local variable.
@@ -490,29 +465,29 @@ namespace llvm {
     /// @param Flags       Flags, e.g. artificial variable.
     /// @param ArgNo       If this variable is an argument then this argument's
     ///                    number. 1 indicates 1st argument.
-    DIVariable createLocalVariable(unsigned Tag, DIDescriptor Scope,
-                                   StringRef Name,
-                                   DIFile File, unsigned LineNo,
-                                   DITypeRef Ty, bool AlwaysPreserve = false,
-                                   unsigned Flags = 0,
-                                   unsigned ArgNo = 0);
+    MDLocalVariable *createLocalVariable(unsigned Tag, MDScope *Scope,
+                                         StringRef Name, MDFile *File,
+                                         unsigned LineNo, MDType *Ty,
+                                         bool AlwaysPreserve = false,
+                                         unsigned Flags = 0,
+                                         unsigned ArgNo = 0);
 
     /// createExpression - Create a new descriptor for the specified
     /// variable which has a complex address expression for its address.
     /// @param Addr        An array of complex address operations.
-    DIExpression createExpression(ArrayRef<uint64_t> Addr = None);
-    DIExpression createExpression(ArrayRef<int64_t> Addr);
+    MDExpression *createExpression(ArrayRef<uint64_t> Addr = None);
+    MDExpression *createExpression(ArrayRef<int64_t> Addr);
 
     /// createBitPieceExpression - Create a descriptor to describe one part
     /// of aggregate variable that is fragmented across multiple Values.
     ///
     /// @param OffsetInBits Offset of the piece in bits.
     /// @param SizeInBits   Size of the piece in bits.
-    DIExpression createBitPieceExpression(unsigned OffsetInBits,
-                                          unsigned SizeInBits);
+    MDExpression *createBitPieceExpression(unsigned OffsetInBits,
+                                           unsigned SizeInBits);
 
     /// createFunction - Create a new descriptor for the specified subprogram.
-    /// See comments in DISubprogram for descriptions of these fields.
+    /// See comments in MDSubprogram* for descriptions of these fields.
     /// @param Scope         Function scope.
     /// @param Name          Function name.
     /// @param LinkageName   Mangled function name.
@@ -527,49 +502,35 @@ namespace llvm {
     /// @param isOptimized   True if optimization is ON.
     /// @param Fn            llvm::Function pointer.
     /// @param TParam        Function template parameters.
-    DISubprogram createFunction(DIDescriptor Scope, StringRef Name,
-                                StringRef LinkageName,
-                                DIFile File, unsigned LineNo,
-                                DICompositeType Ty, bool isLocalToUnit,
-                                bool isDefinition,
-                                unsigned ScopeLine,
-                                unsigned Flags = 0,
-                                bool isOptimized = false,
-                                Function *Fn = nullptr,
-                                MDNode *TParam = nullptr,
-                                MDNode *Decl = nullptr);
+    MDSubprogram *
+    createFunction(MDScope *Scope, StringRef Name, StringRef LinkageName,
+                   MDFile *File, unsigned LineNo, MDSubroutineType *Ty,
+                   bool isLocalToUnit, bool isDefinition, unsigned ScopeLine,
+                   unsigned Flags = 0, bool isOptimized = false,
+                   Function *Fn = nullptr, MDNode *TParam = nullptr,
+                   MDNode *Decl = nullptr);
 
     /// createTempFunctionFwdDecl - Identical to createFunction,
     /// except that the resulting DbgNode is meant to be RAUWed.
-    DISubprogram createTempFunctionFwdDecl(DIDescriptor Scope, StringRef Name,
-                                           StringRef LinkageName,
-                                           DIFile File, unsigned LineNo,
-                                           DICompositeType Ty, bool isLocalToUnit,
-                                           bool isDefinition,
-                                           unsigned ScopeLine,
-                                           unsigned Flags = 0,
-                                           bool isOptimized = false,
-                                           Function *Fn = nullptr,
-                                           MDNode *TParam = nullptr,
-                                           MDNode *Decl = nullptr);
-
+    MDSubprogram *createTempFunctionFwdDecl(
+        MDScope *Scope, StringRef Name, StringRef LinkageName, MDFile *File,
+        unsigned LineNo, MDSubroutineType *Ty, bool isLocalToUnit,
+        bool isDefinition, unsigned ScopeLine, unsigned Flags = 0,
+        bool isOptimized = false, Function *Fn = nullptr,
+        MDNode *TParam = nullptr, MDNode *Decl = nullptr);
 
     /// FIXME: this is added for dragonegg. Once we update dragonegg
     /// to call resolve function, this will be removed.
-    DISubprogram createFunction(DIScopeRef Scope, StringRef Name,
-                                StringRef LinkageName,
-                                DIFile File, unsigned LineNo,
-                                DICompositeType Ty, bool isLocalToUnit,
-                                bool isDefinition,
-                                unsigned ScopeLine,
-                                unsigned Flags = 0,
-                                bool isOptimized = false,
-                                Function *Fn = nullptr,
-                                MDNode *TParam = nullptr,
-                                MDNode *Decl = nullptr);
+    MDSubprogram *
+    createFunction(DIScopeRef Scope, StringRef Name, StringRef LinkageName,
+                   MDFile *File, unsigned LineNo, MDSubroutineType *Ty,
+                   bool isLocalToUnit, bool isDefinition, unsigned ScopeLine,
+                   unsigned Flags = 0, bool isOptimized = false,
+                   Function *Fn = nullptr, MDNode *TParam = nullptr,
+                   MDNode *Decl = nullptr);
 
     /// createMethod - Create a new descriptor for the specified C++ method.
-    /// See comments in DISubprogram for descriptions of these fields.
+    /// See comments in MDSubprogram* for descriptions of these fields.
     /// @param Scope         Function scope.
     /// @param Name          Function name.
     /// @param LinkageName   Mangled function name.
@@ -587,17 +548,13 @@ namespace llvm {
     /// @param isOptimized   True if optimization is ON.
     /// @param Fn            llvm::Function pointer.
     /// @param TParam        Function template parameters.
-    DISubprogram createMethod(DIDescriptor Scope, StringRef Name,
-                              StringRef LinkageName,
-                              DIFile File, unsigned LineNo,
-                              DICompositeType Ty, bool isLocalToUnit,
-                              bool isDefinition,
-                              unsigned Virtuality = 0, unsigned VTableIndex = 0,
-                              DIType VTableHolder = DIType(),
-                              unsigned Flags = 0,
-                              bool isOptimized = false,
-                              Function *Fn = nullptr,
-                              MDNode *TParam = nullptr);
+    MDSubprogram *
+    createMethod(MDScope *Scope, StringRef Name, StringRef LinkageName,
+                 MDFile *File, unsigned LineNo, MDSubroutineType *Ty,
+                 bool isLocalToUnit, bool isDefinition, unsigned Virtuality = 0,
+                 unsigned VTableIndex = 0, MDType *VTableHolder = nullptr,
+                 unsigned Flags = 0, bool isOptimized = false,
+                 Function *Fn = nullptr, MDNode *TParam = nullptr);
 
     /// createNameSpace - This creates new descriptor for a namespace
     /// with the specified parent scope.
@@ -605,9 +562,8 @@ namespace llvm {
     /// @param Name        Name of this namespace
     /// @param File        Source file
     /// @param LineNo      Line number
-    DINameSpace createNameSpace(DIDescriptor Scope, StringRef Name,
-                                DIFile File, unsigned LineNo);
-
+    MDNamespace *createNameSpace(MDScope *Scope, StringRef Name, MDFile *File,
+                                 unsigned LineNo);
 
     /// createLexicalBlockFile - This creates a descriptor for a lexical
     /// block with a new file attached. This merely extends the existing
@@ -615,8 +571,8 @@ namespace llvm {
     /// @param Scope       Lexical block.
     /// @param File        Source file.
     /// @param Discriminator DWARF path discriminator value.
-    DILexicalBlockFile createLexicalBlockFile(DIDescriptor Scope, DIFile File,
-                                              unsigned Discriminator = 0);
+    MDLexicalBlockFile *createLexicalBlockFile(MDScope *Scope, MDFile *File,
+                                               unsigned Discriminator = 0);
 
     /// createLexicalBlock - This creates a descriptor for a lexical block
     /// with the specified parent context.
@@ -624,60 +580,63 @@ namespace llvm {
     /// @param File          Source file.
     /// @param Line          Line number.
     /// @param Col           Column number.
-    DILexicalBlock createLexicalBlock(DIDescriptor Scope, DIFile File,
-                                      unsigned Line, unsigned Col);
+    MDLexicalBlock *createLexicalBlock(MDScope *Scope, MDFile *File,
+                                       unsigned Line, unsigned Col);
 
     /// \brief Create a descriptor for an imported module.
     /// @param Context The scope this module is imported into
     /// @param NS The namespace being imported here
     /// @param Line Line number
-    DIImportedEntity createImportedModule(DIScope Context, DINameSpace NS,
-                                          unsigned Line);
+    MDImportedEntity *createImportedModule(MDScope *Context, MDNamespace *NS,
+                                           unsigned Line);
 
     /// \brief Create a descriptor for an imported module.
     /// @param Context The scope this module is imported into
     /// @param NS An aliased namespace
     /// @param Line Line number
-    DIImportedEntity createImportedModule(DIScope Context, DIImportedEntity NS,
-                                          unsigned Line);
+    MDImportedEntity *createImportedModule(MDScope *Context,
+                                           MDImportedEntity *NS, unsigned Line);
 
     /// \brief Create a descriptor for an imported function.
     /// @param Context The scope this module is imported into
     /// @param Decl The declaration (or definition) of a function, type, or
     ///             variable
     /// @param Line Line number
-    DIImportedEntity createImportedDeclaration(DIScope Context, DIDescriptor Decl,
-                                               unsigned Line,
-                                               StringRef Name = StringRef());
-    DIImportedEntity createImportedDeclaration(DIScope Context,
-                                               DIImportedEntity NS,
-                                               unsigned Line,
-                                               StringRef Name = StringRef());
+    MDImportedEntity *createImportedDeclaration(MDScope *Context,
+                                                DebugNode *Decl, unsigned Line,
+                                                StringRef Name = "");
 
     /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
     /// @param Storage     llvm::Value of the variable
     /// @param VarInfo     Variable's debug info descriptor.
     /// @param Expr         A complex location expression.
+    /// @param DL           Debug info location.
     /// @param InsertAtEnd Location for the new intrinsic.
-    Instruction *insertDeclare(llvm::Value *Storage, DIVariable VarInfo,
-                               DIExpression Expr, BasicBlock *InsertAtEnd);
+    Instruction *insertDeclare(llvm::Value *Storage, MDLocalVariable *VarInfo,
+                               MDExpression *Expr, const MDLocation *DL,
+                               BasicBlock *InsertAtEnd);
 
     /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
     /// @param Storage      llvm::Value of the variable
     /// @param VarInfo      Variable's debug info descriptor.
     /// @param Expr         A complex location expression.
+    /// @param DL           Debug info location.
     /// @param InsertBefore Location for the new intrinsic.
-    Instruction *insertDeclare(llvm::Value *Storage, DIVariable VarInfo,
-                               DIExpression Expr, Instruction *InsertBefore);
+    Instruction *insertDeclare(llvm::Value *Storage, MDLocalVariable *VarInfo,
+                               MDExpression *Expr, const MDLocation *DL,
+                               Instruction *InsertBefore);
 
     /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
     /// @param Val          llvm::Value of the variable
     /// @param Offset       Offset
     /// @param VarInfo      Variable's debug info descriptor.
     /// @param Expr         A complex location expression.
+    /// @param DL           Debug info location.
     /// @param InsertAtEnd Location for the new intrinsic.
     Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
-                                         DIVariable VarInfo, DIExpression Expr,
+                                         MDLocalVariable *VarInfo,
+                                         MDExpression *Expr,
+                                         const MDLocation *DL,
                                          BasicBlock *InsertAtEnd);
 
     /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
@@ -685,24 +644,45 @@ namespace llvm {
     /// @param Offset       Offset
     /// @param VarInfo      Variable's debug info descriptor.
     /// @param Expr         A complex location expression.
+    /// @param DL           Debug info location.
     /// @param InsertBefore Location for the new intrinsic.
     Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
-                                         DIVariable VarInfo, DIExpression Expr,
+                                         MDLocalVariable *VarInfo,
+                                         MDExpression *Expr,
+                                         const MDLocation *DL,
                                          Instruction *InsertBefore);
 
     /// \brief Replace the vtable holder in the given composite type.
     ///
     /// If this creates a self reference, it may orphan some unresolved cycles
     /// in the operands of \c T, so \a DIBuilder needs to track that.
-    void replaceVTableHolder(DICompositeType &T, DICompositeType VTableHolder);
+    void replaceVTableHolder(MDCompositeType *&T,
+                             MDCompositeType *VTableHolder);
 
     /// \brief Replace arrays on a composite type.
     ///
     /// If \c T is resolved, but the arrays aren't -- which can happen if \c T
     /// has a self-reference -- \a DIBuilder needs to track the array to
     /// resolve cycles.
-    void replaceArrays(DICompositeType &T, DIArray Elements,
+    void replaceArrays(MDCompositeType *&T, DIArray Elements,
                        DIArray TParems = DIArray());
+
+    /// \brief Replace a temporary node.
+    ///
+    /// Call \a MDNode::replaceAllUsesWith() on \c N, replacing it with \c
+    /// Replacement.
+    ///
+    /// If \c Replacement is the same as \c N.get(), instead call \a
+    /// MDNode::replaceWithUniqued().  In this case, the uniqued node could
+    /// have a different address, so we return the final address.
+    template <class NodeTy>
+    NodeTy *replaceTemporary(TempMDNode &&N, NodeTy *Replacement) {
+      if (N.get() == Replacement)
+        return cast<NodeTy>(MDNode::replaceWithUniqued(std::move(N)));
+
+      N->replaceAllUsesWith(Replacement);
+      return Replacement;
+    }
   };
 } // end namespace llvm
 
diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h
index 0163c05..18b0c72 100644
--- a/include/llvm/IR/DebugInfo.h
+++ b/include/llvm/IR/DebugInfo.h
@@ -58,1292 +58,365 @@ class DIObjCProperty;
 /// \brief Maps from type identifier to the actual MDNode.
 typedef DenseMap<const MDString *, MDNode *> DITypeIdentifierMap;
 
-class DIHeaderFieldIterator
-    : public std::iterator<std::input_iterator_tag, StringRef, std::ptrdiff_t,
-                           const StringRef *, StringRef> {
-  StringRef Header;
-  StringRef Current;
+class DIDescriptor {
+  MDNode *N;
 
 public:
-  DIHeaderFieldIterator() {}
-  explicit DIHeaderFieldIterator(StringRef Header)
-      : Header(Header), Current(Header.slice(0, Header.find('\0'))) {}
-  StringRef operator*() const { return Current; }
-  const StringRef *operator->() const { return &Current; }
-  DIHeaderFieldIterator &operator++() {
-    increment();
-    return *this;
-  }
-  DIHeaderFieldIterator operator++(int) {
-    DIHeaderFieldIterator X(*this);
-    increment();
-    return X;
-  }
-  bool operator==(const DIHeaderFieldIterator &X) const {
-    return Current.data() == X.Current.data();
-  }
-  bool operator!=(const DIHeaderFieldIterator &X) const {
-    return !(*this == X);
-  }
+  DIDescriptor(const MDNode *N = nullptr) : N(const_cast<MDNode *>(N)) {}
 
-  StringRef getHeader() const { return Header; }
-  StringRef getCurrent() const { return Current; }
-  StringRef getPrefix() const {
-    if (Current.begin() == Header.begin())
-      return StringRef();
-    return Header.slice(0, Current.begin() - Header.begin() - 1);
-  }
-  StringRef getSuffix() const {
-    if (Current.end() == Header.end())
-      return StringRef();
-    return Header.slice(Current.end() - Header.begin() + 1, StringRef::npos);
-  }
-
-  /// \brief Get the current field as a number.
-  ///
-  /// Convert the current field into a number.  Return \c 0 on error.
-  template <class T> T getNumber() const {
-    T Int;
-    if (getCurrent().getAsInteger(0, Int))
-      return 0;
-    return Int;
-  }
-
-private:
-  void increment() {
-    assert(Current.data() != nullptr && "Cannot increment past the end");
-    StringRef Suffix = getSuffix();
-    Current = Suffix.slice(0, Suffix.find('\0'));
-  }
+  operator MDNode *() const { return N; }
+  MDNode *operator->() const { return N; }
+  MDNode &operator*() const { return *N; }
 };
 
-/// \brief A thin wraper around MDNode to access encoded debug info.
-///
-/// This should not be stored in a container, because the underlying MDNode may
-/// change in certain situations.
-class DIDescriptor {
-  // Befriends DIRef so DIRef can befriend the protected member
-  // function: getFieldAs<DIRef>.
-  template <typename T> friend class DIRef;
+#define DECLARE_SIMPLIFY_DESCRIPTOR(DESC)                                      \
+  class DESC;                                                                  \
+  template <> struct simplify_type<const DESC>;                                \
+  template <> struct simplify_type<DESC>;
+DECLARE_SIMPLIFY_DESCRIPTOR(DIDescriptor)
+DECLARE_SIMPLIFY_DESCRIPTOR(DISubrange)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIEnumerator)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIScope)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIType)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIBasicType)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIDerivedType)
+DECLARE_SIMPLIFY_DESCRIPTOR(DICompositeType)
+DECLARE_SIMPLIFY_DESCRIPTOR(DISubroutineType)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIFile)
+DECLARE_SIMPLIFY_DESCRIPTOR(DICompileUnit)
+DECLARE_SIMPLIFY_DESCRIPTOR(DISubprogram)
+DECLARE_SIMPLIFY_DESCRIPTOR(DILexicalBlock)
+DECLARE_SIMPLIFY_DESCRIPTOR(DILexicalBlockFile)
+DECLARE_SIMPLIFY_DESCRIPTOR(DINameSpace)
+DECLARE_SIMPLIFY_DESCRIPTOR(DITemplateTypeParameter)
+DECLARE_SIMPLIFY_DESCRIPTOR(DITemplateValueParameter)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIGlobalVariable)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIVariable)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIExpression)
+DECLARE_SIMPLIFY_DESCRIPTOR(DILocation)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIObjCProperty)
+DECLARE_SIMPLIFY_DESCRIPTOR(DIImportedEntity)
+#undef DECLARE_SIMPLIFY_DESCRIPTOR
+
+typedef DebugNodeArray DIArray;
+typedef MDTypeRefArray DITypeArray;
+typedef DebugNodeRef DIDescriptorRef;
+typedef MDScopeRef DIScopeRef;
+typedef MDTypeRef DITypeRef;
+
+class DISubrange {
+  MDSubrange *N;
 
 public:
-  /// \brief Accessibility flags.
-  ///
-  /// The three accessibility flags are mutually exclusive and rolled together
-  /// in the first two bits.
-  enum {
-#define HANDLE_DI_FLAG(ID, NAME) Flag##NAME = ID,
-#include "llvm/IR/DebugInfoFlags.def"
-    FlagAccessibility = FlagPrivate | FlagProtected | FlagPublic
-  };
-
-  static unsigned getFlag(StringRef Flag);
-  static const char *getFlagString(unsigned Flag);
-
-  /// \brief Split up a flags bitfield.
-  ///
-  /// Split \c Flags into \c SplitFlags, a vector of its components.  Returns
-  /// any remaining (unrecognized) bits.
-  static unsigned splitFlags(unsigned Flags,
-                             SmallVectorImpl<unsigned> &SplitFlags);
-
-protected:
-  const MDNode *DbgNode;
-
-  StringRef getStringField(unsigned Elt) const;
-  unsigned getUnsignedField(unsigned Elt) const {
-    return (unsigned)getUInt64Field(Elt);
-  }
-  uint64_t getUInt64Field(unsigned Elt) const;
-  int64_t getInt64Field(unsigned Elt) const;
-  DIDescriptor getDescriptorField(unsigned Elt) const;
+  DISubrange(const MDSubrange *N = nullptr) : N(const_cast<MDSubrange *>(N)) {}
 
-  template <typename DescTy> DescTy getFieldAs(unsigned Elt) const {
-    return DescTy(getDescriptorField(Elt));
-  }
+  operator MDSubrange *() const { return N; }
+  MDSubrange *operator->() const { return N; }
+  MDSubrange &operator*() const { return *N; }
+};
 
-  GlobalVariable *getGlobalVariableField(unsigned Elt) const;
-  Constant *getConstantField(unsigned Elt) const;
-  Function *getFunctionField(unsigned Elt) const;
+class DIEnumerator {
+  MDEnumerator *N;
 
 public:
-  explicit DIDescriptor(const MDNode *N = nullptr) : DbgNode(N) {}
-
-  bool Verify() const;
-
-  MDNode *get() const { return const_cast<MDNode *>(DbgNode); }
-  operator MDNode *() const { return get(); }
-  MDNode *operator->() const { return get(); }
-  MDNode &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  // An explicit operator bool so that we can do testing of DI values
-  // easily.
-  // FIXME: This operator bool isn't actually protecting anything at the
-  // moment due to the conversion operator above making DIDescriptor nodes
-  // implicitly convertable to bool.
-  explicit operator bool() const { return DbgNode != nullptr; }
-
-  bool operator==(DIDescriptor Other) const { return DbgNode == Other.DbgNode; }
-  bool operator!=(DIDescriptor Other) const { return !operator==(Other); }
-
-  StringRef getHeader() const { return getStringField(0); }
-
-  size_t getNumHeaderFields() const {
-    return std::distance(DIHeaderFieldIterator(getHeader()),
-                         DIHeaderFieldIterator());
-  }
-
-  DIHeaderFieldIterator header_begin() const {
-    return DIHeaderFieldIterator(getHeader());
-  }
-  DIHeaderFieldIterator header_end() const { return DIHeaderFieldIterator(); }
-
-  DIHeaderFieldIterator getHeaderIterator(unsigned Index) const {
-    // Since callers expect an empty string for out-of-range accesses, we can't
-    // use std::advance() here.
-    for (auto I = header_begin(), E = header_end(); I != E; ++I, --Index)
-      if (!Index)
-        return I;
-    return header_end();
-  }
-
-  StringRef getHeaderField(unsigned Index) const {
-    return *getHeaderIterator(Index);
-  }
-
-  template <class T> T getHeaderFieldAs(unsigned Index) const {
-    return getHeaderIterator(Index).getNumber<T>();
-  }
-
-  uint16_t getTag() const {
-    if (auto *N = dyn_cast_or_null<DebugNode>(get()))
-      return N->getTag();
-    return 0;
-  }
+  DIEnumerator(const MDEnumerator *N = nullptr)
+      : N(const_cast<MDEnumerator *>(N)) {}
 
-  bool isDerivedType() const { return get() && isa<MDDerivedTypeBase>(get()); }
-  bool isCompositeType() const {
-    return get() && isa<MDCompositeTypeBase>(get());
-  }
-  bool isSubroutineType() const {
-    return get() && isa<MDSubroutineType>(get());
-  }
-  bool isBasicType() const { return get() && isa<MDBasicType>(get()); }
-  bool isVariable() const { return get() && isa<MDLocalVariable>(get()); }
-  bool isSubprogram() const { return get() && isa<MDSubprogram>(get()); }
-  bool isGlobalVariable() const {
-    return get() && isa<MDGlobalVariable>(get());
-  }
-  bool isScope() const { return get() && isa<MDScope>(get()); }
-  bool isFile() const { return get() && isa<MDFile>(get()); }
-  bool isCompileUnit() const { return get() && isa<MDCompileUnit>(get()); }
-  bool isNameSpace() const{ return get() && isa<MDNamespace>(get()); }
-  bool isLexicalBlockFile() const {
-    return get() && isa<MDLexicalBlockFile>(get());
-  }
-  bool isLexicalBlock() const {
-    return get() && isa<MDLexicalBlockBase>(get());
-  }
-  bool isSubrange() const { return get() && isa<MDSubrange>(get()); }
-  bool isEnumerator() const { return get() && isa<MDEnumerator>(get()); }
-  bool isType() const { return get() && isa<MDType>(get()); }
-  bool isTemplateTypeParameter() const {
-    return get() && isa<MDTemplateTypeParameter>(get());
-  }
-  bool isTemplateValueParameter() const {
-    return get() && isa<MDTemplateValueParameter>(get());
-  }
-  bool isObjCProperty() const { return get() && isa<MDObjCProperty>(get()); }
-  bool isImportedEntity() const {
-    return get() && isa<MDImportedEntity>(get());
-  }
-  bool isExpression() const { return get() && isa<MDExpression>(get()); }
-
-  void print(raw_ostream &OS) const;
-  void dump() const;
-
-  /// \brief Replace all uses of debug info referenced by this descriptor.
-  void replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D);
-  void replaceAllUsesWith(MDNode *D);
+  operator MDEnumerator *() const { return N; }
+  MDEnumerator *operator->() const { return N; }
+  MDEnumerator &operator*() const { return *N; }
 };
 
-#define RETURN_FROM_RAW(VALID, UNUSED)                                         \
-  do {                                                                         \
-    auto *N = get();                                                           \
-    assert(N && "Expected non-null in accessor");                              \
-    return VALID;                                                              \
-  } while (false)
-#define RETURN_DESCRIPTOR_FROM_RAW(DESC, VALID)                                \
-  do {                                                                         \
-    auto *N = get();                                                           \
-    assert(N && "Expected non-null in accessor");                              \
-    return DESC(dyn_cast_or_null<MDNode>(VALID));                              \
-  } while (false)
-#define RETURN_REF_FROM_RAW(REF, VALID)                                        \
-  do {                                                                         \
-    auto *N = get();                                                           \
-    assert(N && "Expected non-null in accessor");                              \
-    return REF::get(VALID);                                                    \
-  } while (false)
-
-/// \brief This is used to represent ranges, for array bounds.
-class DISubrange : public DIDescriptor {
-public:
-  explicit DISubrange(const MDNode *N = nullptr) : DIDescriptor(N) {}
-  DISubrange(const MDSubrange *N) : DIDescriptor(N) {}
-
-  MDSubrange *get() const {
-    return cast_or_null<MDSubrange>(DIDescriptor::get());
-  }
-  operator MDSubrange *() const { return get(); }
-  MDSubrange *operator->() const { return get(); }
-  MDSubrange &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  int64_t getLo() const { RETURN_FROM_RAW(N->getLo(), 0); }
-  int64_t getCount() const { RETURN_FROM_RAW(N->getCount(), 0); }
-  bool Verify() const;
-};
+class DIScope {
+  MDScope *N;
 
-/// \brief This descriptor holds an array of nodes with type T.
-template <typename T> class DITypedArray : public DIDescriptor {
 public:
-  explicit DITypedArray(const MDNode *N = nullptr) : DIDescriptor(N) {}
-  unsigned getNumElements() const {
-    return DbgNode ? DbgNode->getNumOperands() : 0;
-  }
-  T getElement(unsigned Idx) const { return getFieldAs<T>(Idx); }
+  DIScope(const MDScope *N = nullptr) : N(const_cast<MDScope *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator MDScope *() const { return N; }
+  MDScope *operator->() const { return N; }
+  MDScope &operator*() const { return *N; }
 };
 
-typedef DITypedArray<DIDescriptor> DIArray;
+class DIType {
+  MDType *N;
 
-/// \brief A wrapper for an enumerator (e.g. X and Y in 'enum {X,Y}').
-///
-/// FIXME: it seems strange that this doesn't have either a reference to the
-/// type/precision or a file/line pair for location info.
-class DIEnumerator : public DIDescriptor {
 public:
-  explicit DIEnumerator(const MDNode *N = nullptr) : DIDescriptor(N) {}
-  DIEnumerator(const MDEnumerator *N) : DIDescriptor(N) {}
-
-  MDEnumerator *get() const {
-    return cast_or_null<MDEnumerator>(DIDescriptor::get());
-  }
-  operator MDEnumerator *() const { return get(); }
-  MDEnumerator *operator->() const { return get(); }
-  MDEnumerator &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
+  DIType(const MDType *N = nullptr) : N(const_cast<MDType *>(N)) {}
 
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  int64_t getEnumValue() const { RETURN_FROM_RAW(N->getValue(), 0); }
-  bool Verify() const;
+  operator DIDescriptor() const { return N; }
+  operator DIScope() const { return N; }
+  operator MDType *() const { return N; }
+  MDType *operator->() const { return N; }
+  MDType &operator*() const { return *N; }
 };
 
-template <typename T> class DIRef;
-typedef DIRef<DIDescriptor> DIDescriptorRef;
-typedef DIRef<DIScope> DIScopeRef;
-typedef DIRef<DIType> DITypeRef;
-typedef DITypedArray<DITypeRef> DITypeArray;
+class DIBasicType {
+  MDBasicType *N;
 
-/// \brief A base class for various scopes.
-///
-/// Although, implementation-wise, DIScope is the parent class of most
-/// other DIxxx classes, including DIType and its descendants, most of
-/// DIScope's descendants are not a substitutable subtype of
-/// DIScope. The DIDescriptor::isScope() method only is true for
-/// DIScopes that are scopes in the strict lexical scope sense
-/// (DICompileUnit, DISubprogram, etc.), but not for, e.g., a DIType.
-class DIScope : public DIDescriptor {
 public:
-  explicit DIScope(const MDNode *N = nullptr) : DIDescriptor(N) {}
-  DIScope(const MDScope *N) : DIDescriptor(N) {}
-
-  MDScope *get() const { return cast_or_null<MDScope>(DIDescriptor::get()); }
-  operator MDScope *() const { return get(); }
-  MDScope *operator->() const { return get(); }
-  MDScope &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  /// \brief Get the parent scope.
-  ///
-  /// Gets the parent scope for this scope node or returns a default
-  /// constructed scope.
-  DIScopeRef getContext() const;
-  /// \brief Get the scope name.
-  ///
-  /// If the scope node has a name, return that, else return an empty string.
-  StringRef getName() const;
-  StringRef getFilename() const;
-  StringRef getDirectory() const;
-
-  /// \brief Generate a reference to this DIScope.
-  ///
-  /// Uses the type identifier instead of the actual MDNode if possible, to
-  /// help type uniquing.
-  DIScopeRef getRef() const;
+  DIBasicType(const MDBasicType *N = nullptr)
+      : N(const_cast<MDBasicType *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator DIType() const { return N; }
+  operator MDBasicType *() const { return N; }
+  MDBasicType *operator->() const { return N; }
+  MDBasicType &operator*() const { return *N; }
 };
 
-/// \brief Represents reference to a DIDescriptor.
-///
-/// Abstracts over direct and identifier-based metadata references.
-template <typename T> class DIRef {
-  template <typename DescTy>
-  friend DescTy DIDescriptor::getFieldAs(unsigned Elt) const;
-  friend DIScopeRef DIScope::getContext() const;
-  friend DIScopeRef DIScope::getRef() const;
-  friend class DIType;
-
-  /// \brief Val can be either a MDNode or a MDString.
-  ///
-  /// In the latter, MDString specifies the type identifier.
-  const Metadata *Val;
-  explicit DIRef(const Metadata *V);
+class DIDerivedType {
+  MDDerivedTypeBase *N;
 
 public:
-  T resolve(const DITypeIdentifierMap &Map) const;
-  StringRef getName() const;
-  operator Metadata *() const { return const_cast<Metadata *>(Val); }
-
-  static DIRef get(const Metadata *MD) { return DIRef(MD); }
+  DIDerivedType(const MDDerivedTypeBase *N = nullptr)
+      : N(const_cast<MDDerivedTypeBase *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator DIType() const { return N; }
+  operator MDDerivedTypeBase *() const { return N; }
+  MDDerivedTypeBase *operator->() const { return N; }
+  MDDerivedTypeBase &operator*() const { return *N; }
 };
 
-template <typename T>
-T DIRef<T>::resolve(const DITypeIdentifierMap &Map) const {
-  if (!Val)
-    return T();
-
-  if (const MDNode *MD = dyn_cast<MDNode>(Val))
-    return T(MD);
-
-  const MDString *MS = cast<MDString>(Val);
-  // Find the corresponding MDNode.
-  DITypeIdentifierMap::const_iterator Iter = Map.find(MS);
-  assert(Iter != Map.end() && "Identifier not in the type map?");
-  assert(DIDescriptor(Iter->second).isType() &&
-         "MDNode in DITypeIdentifierMap should be a DIType.");
-  return T(Iter->second);
-}
-
-template <typename T> StringRef DIRef<T>::getName() const {
-  if (!Val)
-    return StringRef();
-
-  if (const MDNode *MD = dyn_cast<MDNode>(Val))
-    return T(MD).getName();
-
-  const MDString *MS = cast<MDString>(Val);
-  return MS->getString();
-}
-
-/// \brief Handle fields that are references to DIDescriptors.
-template <>
-DIDescriptorRef DIDescriptor::getFieldAs<DIDescriptorRef>(unsigned Elt) const;
-/// \brief Specialize DIRef constructor for DIDescriptorRef.
-template <> DIRef<DIDescriptor>::DIRef(const Metadata *V);
-
-/// \brief Handle fields that are references to DIScopes.
-template <> DIScopeRef DIDescriptor::getFieldAs<DIScopeRef>(unsigned Elt) const;
-/// \brief Specialize DIRef constructor for DIScopeRef.
-template <> DIRef<DIScope>::DIRef(const Metadata *V);
-
-/// \brief Handle fields that are references to DITypes.
-template <> DITypeRef DIDescriptor::getFieldAs<DITypeRef>(unsigned Elt) const;
-/// \brief Specialize DIRef constructor for DITypeRef.
-template <> DIRef<DIType>::DIRef(const Metadata *V);
-
-/// \brief This is a wrapper for a type.
-///
-/// FIXME: Types should be factored much better so that CV qualifiers and
-/// others do not require a huge and empty descriptor full of zeros.
-class DIType : public DIScope {
-public:
-  explicit DIType(const MDNode *N = nullptr) : DIScope(N) {}
-  DIType(const MDType *N) : DIScope(N) {}
-
-  MDType *get() const { return cast_or_null<MDType>(DIDescriptor::get()); }
-  operator MDType *() const { return get(); }
-  MDType *operator->() const { return get(); }
-  MDType &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  operator DITypeRef() const {
-    assert(isType() &&
-           "constructing DITypeRef from an MDNode that is not a type");
-    return DITypeRef(&*getRef());
-  }
-
-  bool Verify() const;
+class DICompositeType {
+  MDCompositeTypeBase *N;
 
-  DIScopeRef getContext() const {
-    RETURN_REF_FROM_RAW(DIScopeRef, N->getScope());
-  }
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
-  uint64_t getSizeInBits() const { RETURN_FROM_RAW(N->getSizeInBits(), 0); }
-  uint64_t getAlignInBits() const { RETURN_FROM_RAW(N->getAlignInBits(), 0); }
-  // FIXME: Offset is only used for DW_TAG_member nodes.  Making every type
-  // carry this is just plain insane.
-  uint64_t getOffsetInBits() const { RETURN_FROM_RAW(N->getOffsetInBits(), 0); }
-  unsigned getFlags() const { RETURN_FROM_RAW(N->getFlags(), 0); }
-  bool isPrivate() const {
-    return (getFlags() & FlagAccessibility) == FlagPrivate;
-  }
-  bool isProtected() const {
-    return (getFlags() & FlagAccessibility) == FlagProtected;
-  }
-  bool isPublic() const {
-    return (getFlags() & FlagAccessibility) == FlagPublic;
-  }
-  bool isForwardDecl() const { return (getFlags() & FlagFwdDecl) != 0; }
-  bool isAppleBlockExtension() const {
-    return (getFlags() & FlagAppleBlock) != 0;
-  }
-  bool isBlockByrefStruct() const {
-    return (getFlags() & FlagBlockByrefStruct) != 0;
-  }
-  bool isVirtual() const { return (getFlags() & FlagVirtual) != 0; }
-  bool isArtificial() const { return (getFlags() & FlagArtificial) != 0; }
-  bool isObjectPointer() const { return (getFlags() & FlagObjectPointer) != 0; }
-  bool isObjcClassComplete() const {
-    return (getFlags() & FlagObjcClassComplete) != 0;
-  }
-  bool isVector() const { return (getFlags() & FlagVector) != 0; }
-  bool isStaticMember() const { return (getFlags() & FlagStaticMember) != 0; }
-  bool isLValueReference() const {
-    return (getFlags() & FlagLValueReference) != 0;
-  }
-  bool isRValueReference() const {
-    return (getFlags() & FlagRValueReference) != 0;
-  }
-  bool isValid() const { return DbgNode && isType(); }
-};
-
-/// \brief A basic type, like 'int' or 'float'.
-class DIBasicType : public DIType {
 public:
-  explicit DIBasicType(const MDNode *N = nullptr) : DIType(N) {}
-  DIBasicType(const MDBasicType *N) : DIType(N) {}
-
-  MDBasicType *get() const {
-    return cast_or_null<MDBasicType>(DIDescriptor::get());
-  }
-  operator MDBasicType *() const { return get(); }
-  MDBasicType *operator->() const { return get(); }
-  MDBasicType &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  unsigned getEncoding() const { RETURN_FROM_RAW(N->getEncoding(), 0); }
-
-  bool Verify() const;
+  DICompositeType(const MDCompositeTypeBase *N = nullptr)
+      : N(const_cast<MDCompositeTypeBase *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator DIType() const { return N; }
+  operator MDCompositeTypeBase *() const { return N; }
+  MDCompositeTypeBase *operator->() const { return N; }
+  MDCompositeTypeBase &operator*() const { return *N; }
 };
 
-/// \brief A simple derived type
-///
-/// Like a const qualified type, a typedef, a pointer or reference, et cetera.
-/// Or, a data member of a class/struct/union.
-class DIDerivedType : public DIType {
-public:
-  explicit DIDerivedType(const MDNode *N = nullptr) : DIType(N) {}
-  DIDerivedType(const MDDerivedTypeBase *N) : DIType(N) {}
-
-  MDDerivedTypeBase *get() const {
-    return cast_or_null<MDDerivedTypeBase>(DIDescriptor::get());
-  }
-  operator MDDerivedTypeBase *() const { return get(); }
-  MDDerivedTypeBase *operator->() const { return get(); }
-  MDDerivedTypeBase &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  DITypeRef getTypeDerivedFrom() const {
-    RETURN_REF_FROM_RAW(DITypeRef, N->getBaseType());
-  }
-
-  /// \brief Return property node, if this ivar is associated with one.
-  MDNode *getObjCProperty() const {
-    if (auto *N = dyn_cast<MDDerivedType>(get()))
-      return dyn_cast_or_null<MDNode>(N->getExtraData());
-    return nullptr;
-  }
+class DISubroutineType {
+  MDSubroutineType *N;
 
-  DITypeRef getClassType() const {
-    assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
-    if (auto *N = dyn_cast<MDDerivedType>(get()))
-      return DITypeRef::get(N->getExtraData());
-    return DITypeRef::get(nullptr);
-  }
-
-  Constant *getConstant() const {
-    assert((getTag() == dwarf::DW_TAG_member) && isStaticMember());
-    if (auto *N = dyn_cast<MDDerivedType>(get()))
-      if (auto *C = dyn_cast_or_null<ConstantAsMetadata>(N->getExtraData()))
-        return C->getValue();
-
-    return nullptr;
-  }
-
-  bool Verify() const;
+public:
+  DISubroutineType(const MDSubroutineType *N = nullptr)
+      : N(const_cast<MDSubroutineType *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator DIType() const { return N; }
+  operator DICompositeType() const { return N; }
+  operator MDSubroutineType *() const { return N; }
+  MDSubroutineType *operator->() const { return N; }
+  MDSubroutineType &operator*() const { return *N; }
 };
 
-/// \brief Types that refer to multiple other types.
-///
-/// This descriptor holds a type that can refer to multiple other types, like a
-/// function or struct.
-///
-/// DICompositeType is derived from DIDerivedType because some
-/// composite types (such as enums) can be derived from basic types
-// FIXME: Make this derive from DIType directly & just store the
-// base type in a single DIType field.
-class DICompositeType : public DIDerivedType {
-  friend class DIBuilder;
-
-  /// \brief Set the array of member DITypes.
-  void setArraysHelper(MDNode *Elements, MDNode *TParams);
+class DIFile {
+  MDFile *N;
 
 public:
-  explicit DICompositeType(const MDNode *N = nullptr) : DIDerivedType(N) {}
-  DICompositeType(const MDCompositeTypeBase *N) : DIDerivedType(N) {}
+  DIFile(const MDFile *N = nullptr) : N(const_cast<MDFile *>(N)) {}
 
-  MDCompositeTypeBase *get() const {
-    return cast_or_null<MDCompositeTypeBase>(DIDescriptor::get());
-  }
-  operator MDCompositeTypeBase *() const { return get(); }
-  MDCompositeTypeBase *operator->() const { return get(); }
-  MDCompositeTypeBase &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  DIArray getElements() const {
-    assert(!isSubroutineType() && "no elements for DISubroutineType");
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getElements());
-  }
+  operator DIDescriptor() const { return N; }
+  operator DIScope() const { return N; }
+  operator MDFile *() const { return N; }
+  MDFile *operator->() const { return N; }
+  MDFile &operator*() const { return *N; }
+};
 
-private:
-  template <typename T>
-  void setArrays(DITypedArray<T> Elements, DIArray TParams = DIArray()) {
-    assert(
-        (!TParams || DbgNode->getNumOperands() == 8) &&
-        "If you're setting the template parameters this should include a slot "
-        "for that!");
-    setArraysHelper(Elements, TParams);
-  }
+class DICompileUnit {
+  MDCompileUnit *N;
 
 public:
-  unsigned getRunTimeLang() const { RETURN_FROM_RAW(N->getRuntimeLang(), 0); }
-  DITypeRef getContainingType() const {
-    RETURN_REF_FROM_RAW(DITypeRef, N->getVTableHolder());
-  }
+  DICompileUnit(const MDCompileUnit *N = nullptr)
+      : N(const_cast<MDCompileUnit *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator DIScope() const { return N; }
+  operator MDCompileUnit *() const { return N; }
+  MDCompileUnit *operator->() const { return N; }
+  MDCompileUnit &operator*() const { return *N; }
+};
 
-private:
-  /// \brief Set the containing type.
-  void setContainingType(DICompositeType ContainingType);
+class DISubprogram {
+  MDSubprogram *N;
 
 public:
-  DIArray getTemplateParams() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getTemplateParams());
-  }
-  MDString *getIdentifier() const {
-    RETURN_FROM_RAW(N->getRawIdentifier(), nullptr);
-  }
-
-  bool Verify() const;
+  DISubprogram(const MDSubprogram *N = nullptr)
+      : N(const_cast<MDSubprogram *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator DIScope() const { return N; }
+  operator MDSubprogram *() const { return N; }
+  MDSubprogram *operator->() const { return N; }
+  MDSubprogram &operator*() const { return *N; }
 };
 
-class DISubroutineType : public DICompositeType {
-public:
-  explicit DISubroutineType(const MDNode *N = nullptr) : DICompositeType(N) {}
-  DISubroutineType(const MDSubroutineType *N) : DICompositeType(N) {}
+class DILexicalBlock {
+  MDLexicalBlockBase *N;
 
-  MDSubroutineType *get() const {
-    return cast_or_null<MDSubroutineType>(DIDescriptor::get());
-  }
-  operator MDSubroutineType *() const { return get(); }
-  MDSubroutineType *operator->() const { return get(); }
-  MDSubroutineType &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  DITypedArray<DITypeRef> getTypeArray() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DITypedArray<DITypeRef>, N->getTypeArray());
-  }
-};
-
-/// \brief This is a wrapper for a file.
-class DIFile : public DIScope {
 public:
-  explicit DIFile(const MDNode *N = nullptr) : DIScope(N) {}
-  DIFile(const MDFile *N) : DIScope(N) {}
-
-  MDFile *get() const { return cast_or_null<MDFile>(DIDescriptor::get()); }
-  operator MDFile *() const { return get(); }
-  MDFile *operator->() const { return get(); }
-  MDFile &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
+  DILexicalBlock(const MDLexicalBlockBase *N = nullptr)
+      : N(const_cast<MDLexicalBlockBase *>(N)) {}
 
-  /// \brief Retrieve the MDNode for the directory/file pair.
-  MDNode *getFileNode() const { return get(); }
-  bool Verify() const;
+  operator DIDescriptor() const { return N; }
+  operator MDLexicalBlockBase *() const { return N; }
+  MDLexicalBlockBase *operator->() const { return N; }
+  MDLexicalBlockBase &operator*() const { return *N; }
 };
 
-/// \brief A wrapper for a compile unit.
-class DICompileUnit : public DIScope {
-public:
-  explicit DICompileUnit(const MDNode *N = nullptr) : DIScope(N) {}
-  DICompileUnit(const MDCompileUnit *N) : DIScope(N) {}
-
-  MDCompileUnit *get() const {
-    return cast_or_null<MDCompileUnit>(DIDescriptor::get());
-  }
-  operator MDCompileUnit *() const { return get(); }
-  MDCompileUnit *operator->() const { return get(); }
-  MDCompileUnit &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  dwarf::SourceLanguage getLanguage() const {
-    RETURN_FROM_RAW(static_cast<dwarf::SourceLanguage>(N->getSourceLanguage()),
-                    static_cast<dwarf::SourceLanguage>(0));
-  }
-  StringRef getProducer() const { RETURN_FROM_RAW(N->getProducer(), ""); }
-  bool isOptimized() const { RETURN_FROM_RAW(N->isOptimized(), false); }
-  StringRef getFlags() const { RETURN_FROM_RAW(N->getFlags(), ""); }
-  unsigned getRunTimeVersion() const {
-    RETURN_FROM_RAW(N->getRuntimeVersion(), 0);
-  }
+class DILexicalBlockFile {
+  MDLexicalBlockFile *N;
 
-  DIArray getEnumTypes() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getEnumTypes());
-  }
-  DIArray getRetainedTypes() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getRetainedTypes());
-  }
-  DIArray getSubprograms() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getSubprograms());
-  }
-  DIArray getGlobalVariables() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getGlobalVariables());
-  }
-  DIArray getImportedEntities() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getImportedEntities());
-  }
-
-  void replaceSubprograms(DIArray Subprograms);
-  void replaceGlobalVariables(DIArray GlobalVariables);
-
-  StringRef getSplitDebugFilename() const {
-    RETURN_FROM_RAW(N->getSplitDebugFilename(), "");
-  }
-  unsigned getEmissionKind() const { RETURN_FROM_RAW(N->getEmissionKind(), 0); }
-
-  bool Verify() const;
-};
-
-/// \brief This is a wrapper for a subprogram (e.g. a function).
-class DISubprogram : public DIScope {
 public:
-  explicit DISubprogram(const MDNode *N = nullptr) : DIScope(N) {}
-  DISubprogram(const MDSubprogram *N) : DIScope(N) {}
-
-  MDSubprogram *get() const {
-    return cast_or_null<MDSubprogram>(DIDescriptor::get());
-  }
-  operator MDSubprogram *() const { return get(); }
-  MDSubprogram *operator->() const { return get(); }
-  MDSubprogram &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  StringRef getDisplayName() const { RETURN_FROM_RAW(N->getDisplayName(), ""); }
-  StringRef getLinkageName() const { RETURN_FROM_RAW(N->getLinkageName(), ""); }
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
-
-  /// \brief Check if this is local (like 'static' in C).
-  unsigned isLocalToUnit() const { RETURN_FROM_RAW(N->isLocalToUnit(), 0); }
-  unsigned isDefinition() const { RETURN_FROM_RAW(N->isDefinition(), 0); }
-
-  unsigned getVirtuality() const { RETURN_FROM_RAW(N->getVirtuality(), 0); }
-  unsigned getVirtualIndex() const { RETURN_FROM_RAW(N->getVirtualIndex(), 0); }
-
-  unsigned getFlags() const { RETURN_FROM_RAW(N->getFlags(), 0); }
-
-  unsigned isOptimized() const { RETURN_FROM_RAW(N->isOptimized(), 0); }
-
-  /// \brief Get the beginning of the scope of the function (not the name).
-  unsigned getScopeLineNumber() const { RETURN_FROM_RAW(N->getScopeLine(), 0); }
-
-  DIScopeRef getContext() const {
-    RETURN_REF_FROM_RAW(DIScopeRef, N->getScope());
-  }
-  DISubroutineType getType() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DISubroutineType, N->getType());
-  }
-
-  DITypeRef getContainingType() const {
-    RETURN_REF_FROM_RAW(DITypeRef, N->getContainingType());
-  }
-
-  bool Verify() const;
-
-  /// \brief Check if this provides debugging information for the function F.
-  bool describes(const Function *F);
-
-  Function *getFunction() const;
-
-  void replaceFunction(Function *F) {
-    if (auto *N = get())
-      N->replaceFunction(F);
-  }
-  DIArray getTemplateParams() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getTemplateParams());
-  }
-  DISubprogram getFunctionDeclaration() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DISubprogram, N->getDeclaration());
-  }
-  MDNode *getVariablesNodes() const { return getVariables(); }
-  DIArray getVariables() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIArray, N->getVariables());
-  }
-
-  unsigned isArtificial() const { return (getFlags() & FlagArtificial) != 0; }
-  /// \brief Check for the "private" access specifier.
-  bool isPrivate() const {
-    return (getFlags() & FlagAccessibility) == FlagPrivate;
-  }
-  /// \brief Check for the "protected" access specifier.
-  bool isProtected() const {
-    return (getFlags() & FlagAccessibility) == FlagProtected;
-  }
-  /// \brief Check for the "public" access specifier.
-  bool isPublic() const {
-    return (getFlags() & FlagAccessibility) == FlagPublic;
-  }
-  /// \brief Check for "explicit".
-  bool isExplicit() const { return (getFlags() & FlagExplicit) != 0; }
-  /// \brief Check if this is prototyped.
-  bool isPrototyped() const { return (getFlags() & FlagPrototyped) != 0; }
-
-  /// \brief Check if this is reference-qualified.
-  ///
-  /// Return true if this subprogram is a C++11 reference-qualified non-static
-  /// member function (void foo() &).
-  unsigned isLValueReference() const {
-    return (getFlags() & FlagLValueReference) != 0;
-  }
+  DILexicalBlockFile(const MDLexicalBlockFile *N = nullptr)
+      : N(const_cast<MDLexicalBlockFile *>(N)) {}
 
-  /// \brief Check if this is rvalue-reference-qualified.
-  ///
-  /// Return true if this subprogram is a C++11 rvalue-reference-qualified
-  /// non-static member function (void foo() &&).
-  unsigned isRValueReference() const {
-    return (getFlags() & FlagRValueReference) != 0;
-  }
+  operator DIDescriptor() const { return N; }
+  operator MDLexicalBlockFile *() const { return N; }
+  MDLexicalBlockFile *operator->() const { return N; }
+  MDLexicalBlockFile &operator*() const { return *N; }
 };
 
-/// \brief This is a wrapper for a lexical block.
-class DILexicalBlock : public DIScope {
-public:
-  explicit DILexicalBlock(const MDNode *N = nullptr) : DIScope(N) {}
-  DILexicalBlock(const MDLexicalBlock *N) : DIScope(N) {}
+class DINameSpace {
+  MDNamespace *N;
 
-  MDLexicalBlockBase *get() const {
-    return cast_or_null<MDLexicalBlockBase>(DIDescriptor::get());
-  }
-  operator MDLexicalBlockBase *() const { return get(); }
-  MDLexicalBlockBase *operator->() const { return get(); }
-  MDLexicalBlockBase &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  DIScope getContext() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIScope, N->getScope());
-  }
-  unsigned getLineNumber() const {
-    if (auto *N = dyn_cast<MDLexicalBlock>(get()))
-      return N->getLine();
-    return 0;
-  }
-  unsigned getColumnNumber() const {
-    if (auto *N = dyn_cast<MDLexicalBlock>(get()))
-      return N->getColumn();
-    return 0;
-  }
-  bool Verify() const;
-};
-
-/// \brief This is a wrapper for a lexical block with a filename change.
-class DILexicalBlockFile : public DIScope {
 public:
-  explicit DILexicalBlockFile(const MDNode *N = nullptr) : DIScope(N) {}
-  DILexicalBlockFile(const MDLexicalBlockFile *N) : DIScope(N) {}
-
-  MDLexicalBlockFile *get() const {
-    return cast_or_null<MDLexicalBlockFile>(DIDescriptor::get());
-  }
-  operator MDLexicalBlockFile *() const { return get(); }
-  MDLexicalBlockFile *operator->() const { return get(); }
-  MDLexicalBlockFile &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  DIScope getContext() const {
-    // FIXME: This logic is horrible.  getScope() returns a DILexicalBlock, but
-    // then we check if it's a subprogram?  WHAT?!?
-    if (getScope().isSubprogram())
-      return getScope();
-    return getScope().getContext();
-  }
-  unsigned getLineNumber() const { return getScope().getLineNumber(); }
-  unsigned getColumnNumber() const { return getScope().getColumnNumber(); }
-  DILexicalBlock getScope() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DILexicalBlock, N->getScope());
-  }
-  unsigned getDiscriminator() const {
-    RETURN_FROM_RAW(N->getDiscriminator(), 0);
-  }
-  bool Verify() const;
+  DINameSpace(const MDNamespace *N = nullptr)
+      : N(const_cast<MDNamespace *>(N)) {}
+
+  operator DIDescriptor() const { return N; }
+  operator DIScope() const { return N; }
+  operator MDNamespace *() const { return N; }
+  MDNamespace *operator->() const { return N; }
+  MDNamespace &operator*() const { return *N; }
 };
 
-/// \brief A wrapper for a C++ style name space.
-class DINameSpace : public DIScope {
-public:
-  explicit DINameSpace(const MDNode *N = nullptr) : DIScope(N) {}
-  DINameSpace(const MDNamespace *N) : DIScope(N) {}
-
-  MDNamespace *get() const {
-    return cast_or_null<MDNamespace>(DIDescriptor::get());
-  }
-  operator MDNamespace *() const { return get(); }
-  MDNamespace *operator->() const { return get(); }
-  MDNamespace &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
-  DIScope getContext() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIScope, N->getScope());
-  }
-  bool Verify() const;
-};
+class DITemplateTypeParameter {
+  MDTemplateTypeParameter *N;
 
-/// \brief This is a wrapper for template type parameter.
-class DITemplateTypeParameter : public DIDescriptor {
 public:
-  explicit DITemplateTypeParameter(const MDNode *N = nullptr)
-      : DIDescriptor(N) {}
-  DITemplateTypeParameter(const MDTemplateTypeParameter *N) : DIDescriptor(N) {}
+  DITemplateTypeParameter(const MDTemplateTypeParameter *N = nullptr)
+      : N(const_cast<MDTemplateTypeParameter *>(N)) {}
 
-  MDTemplateTypeParameter *get() const {
-    return cast_or_null<MDTemplateTypeParameter>(DIDescriptor::get());
-  }
-  operator MDTemplateTypeParameter *() const { return get(); }
-  MDTemplateTypeParameter *operator->() const { return get(); }
-  MDTemplateTypeParameter &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-
-  DITypeRef getType() const { RETURN_REF_FROM_RAW(DITypeRef, N->getType()); }
-  bool Verify() const;
+  operator MDTemplateTypeParameter *() const { return N; }
+  MDTemplateTypeParameter *operator->() const { return N; }
+  MDTemplateTypeParameter &operator*() const { return *N; }
 };
 
-/// \brief This is a wrapper for template value parameter.
-class DITemplateValueParameter : public DIDescriptor {
-public:
-  explicit DITemplateValueParameter(const MDNode *N = nullptr)
-      : DIDescriptor(N) {}
-  DITemplateValueParameter(const MDTemplateValueParameter *N)
-      : DIDescriptor(N) {}
+class DITemplateValueParameter {
+  MDTemplateValueParameter *N;
 
-  MDTemplateValueParameter *get() const {
-    return cast_or_null<MDTemplateValueParameter>(DIDescriptor::get());
-  }
-  operator MDTemplateValueParameter *() const { return get(); }
-  MDTemplateValueParameter *operator->() const { return get(); }
-  MDTemplateValueParameter &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
+public:
+  DITemplateValueParameter(const MDTemplateValueParameter *N = nullptr)
+      : N(const_cast<MDTemplateValueParameter *>(N)) {}
 
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  DITypeRef getType() const { RETURN_REF_FROM_RAW(DITypeRef, N->getType()); }
-  Metadata *getValue() const { RETURN_FROM_RAW(N->getValue(), nullptr); }
-  bool Verify() const;
+  operator MDTemplateValueParameter *() const { return N; }
+  MDTemplateValueParameter *operator->() const { return N; }
+  MDTemplateValueParameter &operator*() const { return *N; }
 };
 
-/// \brief This is a wrapper for a global variable.
-class DIGlobalVariable : public DIDescriptor {
-  DIFile getFile() const { RETURN_DESCRIPTOR_FROM_RAW(DIFile, N->getFile()); }
+class DIGlobalVariable {
+  MDGlobalVariable *N;
 
 public:
-  explicit DIGlobalVariable(const MDNode *N = nullptr) : DIDescriptor(N) {}
-  DIGlobalVariable(const MDGlobalVariable *N) : DIDescriptor(N) {}
+  DIGlobalVariable(const MDGlobalVariable *N = nullptr)
+      : N(const_cast<MDGlobalVariable *>(N)) {}
 
-  MDGlobalVariable *get() const {
-    return cast_or_null<MDGlobalVariable>(DIDescriptor::get());
-  }
-  operator MDGlobalVariable *() const { return get(); }
-  MDGlobalVariable *operator->() const { return get(); }
-  MDGlobalVariable &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  StringRef getDisplayName() const { RETURN_FROM_RAW(N->getDisplayName(), ""); }
-  StringRef getLinkageName() const { RETURN_FROM_RAW(N->getLinkageName(), ""); }
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
-  unsigned isLocalToUnit() const { RETURN_FROM_RAW(N->isLocalToUnit(), 0); }
-  unsigned isDefinition() const { RETURN_FROM_RAW(N->isDefinition(), 0); }
-
-  DIScope getContext() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIScope, N->getScope());
-  }
-  StringRef getFilename() const { return getFile().getFilename(); }
-  StringRef getDirectory() const { return getFile().getDirectory(); }
-  DITypeRef getType() const { RETURN_REF_FROM_RAW(DITypeRef, N->getType()); }
-
-  GlobalVariable *getGlobal() const;
-  Constant *getConstant() const {
-    if (auto *N = get())
-      if (auto *C = dyn_cast_or_null<ConstantAsMetadata>(N->getVariable()))
-        return C->getValue();
-    return nullptr;
-  }
-  DIDerivedType getStaticDataMemberDeclaration() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIDerivedType,
-                               N->getStaticDataMemberDeclaration());
-  }
-
-  bool Verify() const;
+  operator DIDescriptor() const { return N; }
+  operator MDGlobalVariable *() const { return N; }
+  MDGlobalVariable *operator->() const { return N; }
+  MDGlobalVariable &operator*() const { return *N; }
 };
 
-/// \brief This is a wrapper for a variable (e.g. parameter, local, global etc).
-class DIVariable : public DIDescriptor {
-  unsigned getFlags() const { RETURN_FROM_RAW(N->getFlags(), 0); }
+class DIVariable {
+  MDLocalVariable *N;
 
 public:
-  explicit DIVariable(const MDNode *N = nullptr) : DIDescriptor(N) {}
-  DIVariable(const MDLocalVariable *N) : DIDescriptor(N) {}
+  DIVariable(const MDLocalVariable *N = nullptr)
+      : N(const_cast<MDLocalVariable *>(N)) {}
 
-  MDLocalVariable *get() const {
-    return cast_or_null<MDLocalVariable>(DIDescriptor::get());
-  }
-  operator MDLocalVariable *() const { return get(); }
-  MDLocalVariable *operator->() const { return get(); }
-  MDLocalVariable &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
-  unsigned getArgNumber() const { RETURN_FROM_RAW(N->getArg(), 0); }
-
-  DIScope getContext() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIScope, N->getScope());
-  }
-  DIFile getFile() const { RETURN_DESCRIPTOR_FROM_RAW(DIFile, N->getFile()); }
-  DITypeRef getType() const { RETURN_REF_FROM_RAW(DITypeRef, N->getType()); }
-
-  /// \brief Return true if this variable is marked as "artificial".
-  bool isArtificial() const {
-    return (getFlags() & FlagArtificial) != 0;
-  }
-
-  bool isObjectPointer() const {
-    return (getFlags() & FlagObjectPointer) != 0;
-  }
-
-  /// \brief If this variable is inlined then return inline location.
-  MDNode *getInlinedAt() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIDescriptor, N->getInlinedAt());
-  }
-
-  bool Verify() const;
-
-  /// \brief Check if this is a "__block" variable (Apple Blocks).
-  bool isBlockByrefVariable(const DITypeIdentifierMap &Map) const {
-    return (getType().resolve(Map)).isBlockByrefStruct();
-  }
-
-  /// \brief Check if this is an inlined function argument.
-  bool isInlinedFnArgument(const Function *CurFn);
-
-  /// \brief Return the size reported by the variable's type.
-  unsigned getSizeInBits(const DITypeIdentifierMap &Map);
-
-  void printExtendedName(raw_ostream &OS) const;
+  operator MDLocalVariable *() const { return N; }
+  MDLocalVariable *operator->() const { return N; }
+  MDLocalVariable &operator*() const { return *N; }
 };
 
-/// \brief A complex location expression in postfix notation.
-///
-/// This is (almost) a DWARF expression that modifies the location of a
-/// variable or (or the location of a single piece of a variable).
-///
-/// FIXME: Instead of DW_OP_plus taking an argument, this should use DW_OP_const
-/// and have DW_OP_plus consume the topmost elements on the stack.
-class DIExpression : public DIDescriptor {
-public:
-  explicit DIExpression(const MDNode *N = nullptr) : DIDescriptor(N) {}
-  DIExpression(const MDExpression *N) : DIDescriptor(N) {}
-
-  MDExpression *get() const {
-    return cast_or_null<MDExpression>(DIDescriptor::get());
-  }
-  operator MDExpression *() const { return get(); }
-  MDExpression *operator->() const { return get(); }
-  MDExpression &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  // Don't call this.  Call isValid() directly.
-  bool Verify() const = delete;
-
-  /// \brief Return the number of elements in the complex expression.
-  unsigned getNumElements() const { return get()->getNumElements(); }
-
-  /// \brief return the Idx'th complex address element.
-  uint64_t getElement(unsigned I) const { return get()->getElement(I); }
-
-  /// \brief Return whether this is a piece of an aggregate variable.
-  bool isBitPiece() const;
-  /// \brief Return the offset of this piece in bits.
-  uint64_t getBitPieceOffset() const;
-  /// \brief Return the size of this piece in bits.
-  uint64_t getBitPieceSize() const;
-
-  class iterator;
-  /// \brief A lightweight wrapper around an element of a DIExpression.
-  class Operand {
-    friend class iterator;
-    MDExpression::element_iterator I;
-    Operand() {}
-    Operand(MDExpression::element_iterator I) : I(I) {}
-  public:
-    /// \brief Operands such as DW_OP_piece have explicit (non-stack) arguments.
-    /// Argument 0 is the operand itself.
-    uint64_t getArg(unsigned N) const {
-      MDExpression::element_iterator In = I;
-      std::advance(In, N);
-      return *In;
-    }
-    operator uint64_t () const { return *I; }
-    /// \brief Returns underlying MDExpression::element_iterator.
-    const MDExpression::element_iterator &getBase() const { return I; }
-    /// \brief Returns the next operand.
-    iterator getNext() const;
-  };
-
-  /// \brief An iterator for DIExpression elements.
-  class iterator : public std::iterator<std::input_iterator_tag, StringRef,
-                                        unsigned, const Operand*, Operand> {
-    friend class Operand;
-    MDExpression::element_iterator I;
-    Operand Tmp;
-
-  public:
-    iterator(MDExpression::element_iterator I) : I(I) {}
-    const Operand &operator*() { return Tmp = Operand(I); }
-    const Operand *operator->() { return &(Tmp = Operand(I)); }
-    iterator &operator++() {
-      increment();
-      return *this;
-    }
-    iterator operator++(int) {
-      iterator X(*this);
-      increment();
-      return X;
-    }
-    bool operator==(const iterator &X) const { return I == X.I; }
-    bool operator!=(const iterator &X) const { return !(*this == X); }
-
-  private:
-    void increment() {
-      switch (**this) {
-      case dwarf::DW_OP_bit_piece: std::advance(I, 3); break;
-      case dwarf::DW_OP_plus:      std::advance(I, 2); break;
-      case dwarf::DW_OP_deref:     std::advance(I, 1); break;
-      default:
-        llvm_unreachable("unsupported operand");
-      }
-    }
-  };
-
-  iterator begin() const { return get()->elements_begin(); }
-  iterator end() const { return get()->elements_end(); }
-};
+class DIExpression {
+  MDExpression *N;
 
-/// \brief This object holds location information.
-///
-/// This object is not associated with any DWARF tag.
-class DILocation : public DIDescriptor {
 public:
-  explicit DILocation(const MDNode *N) : DIDescriptor(N) {}
-
-  MDLocation *get() const {
-    return cast_or_null<MDLocation>(DIDescriptor::get());
-  }
-  operator MDLocation *() const { return get(); }
-  MDLocation *operator->() const { return get(); }
-  MDLocation &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
-
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
-  unsigned getColumnNumber() const { RETURN_FROM_RAW(N->getColumn(), 0); }
-  DIScope getScope() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIScope, N->getScope());
-  }
-  DILocation getOrigLocation() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DILocation, N->getInlinedAt());
-  }
-  StringRef getFilename() const { return getScope().getFilename(); }
-  StringRef getDirectory() const { return getScope().getDirectory(); }
-  bool Verify() const;
-  bool atSameLineAs(const DILocation &Other) const {
-    return (getLineNumber() == Other.getLineNumber() &&
-            getFilename() == Other.getFilename());
-  }
-  /// \brief Get the DWAF discriminator.
-  ///
-  /// DWARF discriminators are used to distinguish identical file locations for
-  /// instructions that are on different basic blocks. If two instructions are
-  /// inside the same lexical block and are in different basic blocks, we
-  /// create a new lexical block with identical location as the original but
-  /// with a different discriminator value
-  /// (lib/Transforms/Util/AddDiscriminators.cpp for details).
-  unsigned getDiscriminator() const {
-    // Since discriminators are associated with lexical blocks, make
-    // sure this location is a lexical block before retrieving its
-    // value.
-    return getScope().isLexicalBlockFile()
-               ? DILexicalBlockFile(
-                     cast<MDNode>(cast<MDLocation>(DbgNode)->getScope()))
-                     .getDiscriminator()
-               : 0;
-  }
+  DIExpression(const MDExpression *N = nullptr)
+      : N(const_cast<MDExpression *>(N)) {}
 
-  /// \brief Generate a new discriminator value for this location.
-  unsigned computeNewDiscriminator(LLVMContext &Ctx);
-
-  /// \brief Return a copy of this location with a different scope.
-  DILocation copyWithNewScope(LLVMContext &Ctx, DILexicalBlockFile NewScope);
+  operator MDExpression *() const { return N; }
+  MDExpression *operator->() const { return N; }
+  MDExpression &operator*() const { return *N; }
 };
 
-class DIObjCProperty : public DIDescriptor {
+class DILocation {
+  MDLocation *N;
+
 public:
-  explicit DIObjCProperty(const MDNode *N) : DIDescriptor(N) {}
-  DIObjCProperty(const MDObjCProperty *N) : DIDescriptor(N) {}
+  DILocation(const MDLocation *N = nullptr) : N(const_cast<MDLocation *>(N)) {}
 
-  MDObjCProperty *get() const {
-    return cast_or_null<MDObjCProperty>(DIDescriptor::get());
-  }
-  operator MDObjCProperty *() const { return get(); }
-  MDObjCProperty *operator->() const { return get(); }
-  MDObjCProperty &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
+  operator MDLocation *() const { return N; }
+  MDLocation *operator->() const { return N; }
+  MDLocation &operator*() const { return *N; }
+};
 
-  StringRef getObjCPropertyName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  DIFile getFile() const { RETURN_DESCRIPTOR_FROM_RAW(DIFile, N->getFile()); }
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
+class DIObjCProperty {
+  MDObjCProperty *N;
 
-  StringRef getObjCPropertyGetterName() const {
-    RETURN_FROM_RAW(N->getGetterName(), "");
-  }
-  StringRef getObjCPropertySetterName() const {
-    RETURN_FROM_RAW(N->getSetterName(), "");
-  }
-  unsigned getAttributes() const { RETURN_FROM_RAW(N->getAttributes(), 0); }
-  bool isReadOnlyObjCProperty() const {
-    return (getAttributes() & dwarf::DW_APPLE_PROPERTY_readonly) != 0;
-  }
-  bool isReadWriteObjCProperty() const {
-    return (getAttributes() & dwarf::DW_APPLE_PROPERTY_readwrite) != 0;
-  }
-  bool isAssignObjCProperty() const {
-    return (getAttributes() & dwarf::DW_APPLE_PROPERTY_assign) != 0;
-  }
-  bool isRetainObjCProperty() const {
-    return (getAttributes() & dwarf::DW_APPLE_PROPERTY_retain) != 0;
-  }
-  bool isCopyObjCProperty() const {
-    return (getAttributes() & dwarf::DW_APPLE_PROPERTY_copy) != 0;
-  }
-  bool isNonAtomicObjCProperty() const {
-    return (getAttributes() & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0;
-  }
-
-  /// \brief Get the type.
-  ///
-  /// \note Objective-C doesn't have an ODR, so there is no benefit in storing
-  /// the type as a DITypeRef here.
-  DIType getType() const { RETURN_DESCRIPTOR_FROM_RAW(DIType, N->getType()); }
+public:
+  DIObjCProperty(const MDObjCProperty *N = nullptr)
+      : N(const_cast<MDObjCProperty *>(N)) {}
 
-  bool Verify() const;
+  operator MDObjCProperty *() const { return N; }
+  MDObjCProperty *operator->() const { return N; }
+  MDObjCProperty &operator*() const { return *N; }
 };
 
-/// \brief An imported module (C++ using directive or similar).
-class DIImportedEntity : public DIDescriptor {
-public:
-  DIImportedEntity() = default;
-  explicit DIImportedEntity(const MDNode *N) : DIDescriptor(N) {}
-  DIImportedEntity(const MDImportedEntity *N) : DIDescriptor(N) {}
+class DIImportedEntity {
+  MDImportedEntity *N;
 
-  MDImportedEntity *get() const {
-    return cast_or_null<MDImportedEntity>(DIDescriptor::get());
-  }
-  operator MDImportedEntity *() const { return get(); }
-  MDImportedEntity *operator->() const { return get(); }
-  MDImportedEntity &operator*() const {
-    assert(get() && "Expected valid pointer");
-    return *get();
-  }
+public:
+  DIImportedEntity(const MDImportedEntity *N = nullptr)
+      : N(const_cast<MDImportedEntity *>(N)) {}
 
-  DIScope getContext() const {
-    RETURN_DESCRIPTOR_FROM_RAW(DIScope, N->getScope());
-  }
-  DIDescriptorRef getEntity() const {
-    RETURN_REF_FROM_RAW(DIDescriptorRef, N->getEntity());
-  }
-  unsigned getLineNumber() const { RETURN_FROM_RAW(N->getLine(), 0); }
-  StringRef getName() const { RETURN_FROM_RAW(N->getName(), ""); }
-  bool Verify() const;
+  operator DIDescriptor() const { return N; }
+  operator MDImportedEntity *() const { return N; }
+  MDImportedEntity *operator->() const { return N; }
+  MDImportedEntity &operator*() const { return *N; }
 };
 
-#undef RETURN_FROM_RAW
-#undef RETURN_DESCRIPTOR_FROM_RAW
-#undef RETURN_REF_FROM_RAW
+#define SIMPLIFY_DESCRIPTOR(DESC)                                              \
+  template <> struct simplify_type<const DESC> {                               \
+    typedef Metadata *SimpleType;                                              \
+    static SimpleType getSimplifiedValue(const DESC &DI) { return DI; }        \
+  };                                                                           \
+  template <> struct simplify_type<DESC> : simplify_type<const DESC> {};
+SIMPLIFY_DESCRIPTOR(DIDescriptor)
+SIMPLIFY_DESCRIPTOR(DISubrange)
+SIMPLIFY_DESCRIPTOR(DIEnumerator)
+SIMPLIFY_DESCRIPTOR(DIScope)
+SIMPLIFY_DESCRIPTOR(DIType)
+SIMPLIFY_DESCRIPTOR(DIBasicType)
+SIMPLIFY_DESCRIPTOR(DIDerivedType)
+SIMPLIFY_DESCRIPTOR(DICompositeType)
+SIMPLIFY_DESCRIPTOR(DISubroutineType)
+SIMPLIFY_DESCRIPTOR(DIFile)
+SIMPLIFY_DESCRIPTOR(DICompileUnit)
+SIMPLIFY_DESCRIPTOR(DISubprogram)
+SIMPLIFY_DESCRIPTOR(DILexicalBlock)
+SIMPLIFY_DESCRIPTOR(DILexicalBlockFile)
+SIMPLIFY_DESCRIPTOR(DINameSpace)
+SIMPLIFY_DESCRIPTOR(DITemplateTypeParameter)
+SIMPLIFY_DESCRIPTOR(DITemplateValueParameter)
+SIMPLIFY_DESCRIPTOR(DIGlobalVariable)
+SIMPLIFY_DESCRIPTOR(DIVariable)
+SIMPLIFY_DESCRIPTOR(DIExpression)
+SIMPLIFY_DESCRIPTOR(DILocation)
+SIMPLIFY_DESCRIPTOR(DIObjCProperty)
+SIMPLIFY_DESCRIPTOR(DIImportedEntity)
+#undef SIMPLIFY_DESCRIPTOR
 
 /// \brief Find subprogram that is enclosing this scope.
 DISubprogram getDISubprogram(const MDNode *Scope);
@@ -1356,16 +429,6 @@ DISubprogram getDISubprogram(const Function *F);
 /// \brief Find underlying composite type.
 DICompositeType getDICompositeType(DIType T);
 
-/// \brief Create a new inlined variable based on current variable.
-///
-/// @param DV            Current Variable.
-/// @param InlinedScope  Location at current variable is inlined.
-DIVariable createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
-                                 LLVMContext &VMContext);
-
-/// \brief Remove inlined scope from the variable.
-DIVariable cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext);
-
 /// \brief Generate map by visiting all retained types.
 DITypeIdentifierMap generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes);
 
@@ -1375,6 +438,7 @@ DITypeIdentifierMap generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes);
 /// metadata for debugging. We also remove debug locations for instructions.
 /// Return true if module is modified.
 bool StripDebugInfo(Module &M);
+bool stripDebugInfo(Function &F);
 
 /// \brief Return Debug Info Metadata Version by checking module flags.
 unsigned getDebugMetadataVersionFromModule(const Module &M);
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index d7563fc..62373c4 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -41,6 +41,101 @@
 
 namespace llvm {
 
+/// \brief Pointer union between a subclass of DebugNode and MDString.
+///
+/// \a MDCompositeType can be referenced via an \a MDString unique identifier.
+/// This class allows some type safety in the face of that, requiring either a
+/// node of a particular type or an \a MDString.
+template <class T> class TypedDebugNodeRef {
+  const Metadata *MD = nullptr;
+
+public:
+  TypedDebugNodeRef() = default;
+  TypedDebugNodeRef(std::nullptr_t) {}
+
+  /// \brief Construct from a raw pointer.
+  explicit TypedDebugNodeRef(const Metadata *MD) : MD(MD) {
+    assert((!MD || isa<MDString>(MD) || isa<T>(MD)) && "Expected valid ref");
+  }
+
+  template <class U>
+  TypedDebugNodeRef(
+      const TypedDebugNodeRef<U> &X,
+      typename std::enable_if<std::is_convertible<U *, T *>::value>::type * =
+          nullptr)
+      : MD(X) {}
+
+  operator Metadata *() const { return const_cast<Metadata *>(MD); }
+
+  bool operator==(const TypedDebugNodeRef<T> &X) const { return MD == X.MD; };
+  bool operator!=(const TypedDebugNodeRef<T> &X) const { return MD != X.MD; };
+
+  /// \brief Create a reference.
+  ///
+  /// Get a reference to \c N, using an \a MDString reference if available.
+  static TypedDebugNodeRef get(const T *N);
+
+  template <class MapTy> T *resolve(const MapTy &Map) const {
+    if (!MD)
+      return nullptr;
+
+    if (auto *Typed = dyn_cast<T>(MD))
+      return const_cast<T *>(Typed);
+
+    auto *S = cast<MDString>(MD);
+    auto I = Map.find(S);
+    assert(I != Map.end() && "Missing identifier in type map");
+    return cast<T>(I->second);
+  }
+};
+
+typedef TypedDebugNodeRef<DebugNode> DebugNodeRef;
+typedef TypedDebugNodeRef<MDScope> MDScopeRef;
+typedef TypedDebugNodeRef<MDType> MDTypeRef;
+
+class MDTypeRefArray {
+  const MDTuple *N = nullptr;
+
+public:
+  MDTypeRefArray(const MDTuple *N) : N(N) {}
+
+  explicit operator bool() const { return get(); }
+  explicit operator MDTuple *() const { return get(); }
+
+  MDTuple *get() const { return const_cast<MDTuple *>(N); }
+  MDTuple *operator->() const { return get(); }
+  MDTuple &operator*() const { return *get(); }
+
+  // FIXME: Fix callers and remove condition on N.
+  unsigned size() const { return N ? N->getNumOperands() : 0u; }
+  MDTypeRef operator[](unsigned I) const { return MDTypeRef(N->getOperand(I)); }
+
+  class iterator : std::iterator<std::input_iterator_tag, MDTypeRef,
+                                 std::ptrdiff_t, void, MDTypeRef> {
+    MDNode::op_iterator I = nullptr;
+
+  public:
+    iterator() = default;
+    explicit iterator(MDNode::op_iterator I) : I(I) {}
+    MDTypeRef operator*() const { return MDTypeRef(*I); }
+    iterator &operator++() {
+      ++I;
+      return *this;
+    }
+    iterator operator++(int) {
+      iterator Temp(*this);
+      ++I;
+      return Temp;
+    }
+    bool operator==(const iterator &X) const { return I == X.I; }
+    bool operator!=(const iterator &X) const { return I != X.I; }
+  };
+
+  // FIXME: Fix callers and remove condition on N.
+  iterator begin() const { return N ? iterator(N->op_begin()) : iterator(); }
+  iterator end() const { return N ? iterator(N->op_end()) : iterator(); }
+};
+
 /// \brief Tagged DWARF-like metadata node.
 ///
 /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*,
@@ -57,7 +152,7 @@ protected:
     assert(Tag < 1u << 16);
     SubclassData16 = Tag;
   }
-  ~DebugNode() {}
+  ~DebugNode() = default;
 
   template <class Ty> Ty *getOperandAs(unsigned I) const {
     return cast_or_null<Ty>(getOperand(I));
@@ -78,6 +173,28 @@ protected:
 public:
   unsigned getTag() const { return SubclassData16; }
 
+  /// \brief Debug info flags.
+  ///
+  /// The three accessibility flags are mutually exclusive and rolled together
+  /// in the first two bits.
+  enum DIFlags {
+#define HANDLE_DI_FLAG(ID, NAME) Flag##NAME = ID,
+#include "llvm/IR/DebugInfoFlags.def"
+    FlagAccessibility = FlagPrivate | FlagProtected | FlagPublic
+  };
+
+  static unsigned getFlag(StringRef Flag);
+  static const char *getFlagString(unsigned Flag);
+
+  /// \brief Split up a flags bitfield.
+  ///
+  /// Split \c Flags into \c SplitFlags, a vector of its components.  Returns
+  /// any remaining (unrecognized) bits.
+  static unsigned splitFlags(unsigned Flags,
+                             SmallVectorImpl<unsigned> &SplitFlags);
+
+  DebugNodeRef getRef() const { return DebugNodeRef::get(this); }
+
   static bool classof(const Metadata *MD) {
     switch (MD->getMetadataID()) {
     default:
@@ -106,6 +223,18 @@ public:
   }
 };
 
+template <class T>
+struct simplify_type<const TypedDebugNodeRef<T>> {
+  typedef Metadata *SimpleType;
+  static SimpleType getSimplifiedValue(const TypedDebugNodeRef<T> &MD) {
+    return MD;
+  }
+};
+
+template <class T>
+struct simplify_type<TypedDebugNodeRef<T>>
+    : simplify_type<const TypedDebugNodeRef<T>> {};
+
 /// \brief Generic tagged DWARF-like metadata node.
 ///
 /// An un-specialized DWARF-like metadata node.  The first operand is a
@@ -192,27 +321,30 @@ class MDSubrange : public DebugNode {
   friend class MDNode;
 
   int64_t Count;
-  int64_t Lo;
+  int64_t LowerBound;
 
-  MDSubrange(LLVMContext &C, StorageType Storage, int64_t Count, int64_t Lo)
+  MDSubrange(LLVMContext &C, StorageType Storage, int64_t Count,
+             int64_t LowerBound)
       : DebugNode(C, MDSubrangeKind, Storage, dwarf::DW_TAG_subrange_type,
                   None),
-        Count(Count), Lo(Lo) {}
-  ~MDSubrange() {}
+        Count(Count), LowerBound(LowerBound) {}
+  ~MDSubrange() = default;
 
-  static MDSubrange *getImpl(LLVMContext &Context, int64_t Count, int64_t Lo,
-                             StorageType Storage, bool ShouldCreate = true);
+  static MDSubrange *getImpl(LLVMContext &Context, int64_t Count,
+                             int64_t LowerBound, StorageType Storage,
+                             bool ShouldCreate = true);
 
   TempMDSubrange cloneImpl() const {
-    return getTemporary(getContext(), getCount(), getLo());
+    return getTemporary(getContext(), getCount(), getLowerBound());
   }
 
 public:
-  DEFINE_MDNODE_GET(MDSubrange, (int64_t Count, int64_t Lo = 0), (Count, Lo))
+  DEFINE_MDNODE_GET(MDSubrange, (int64_t Count, int64_t LowerBound = 0),
+                    (Count, LowerBound))
 
   TempMDSubrange clone() const { return cloneImpl(); }
 
-  int64_t getLo() const { return Lo; }
+  int64_t getLowerBound() const { return LowerBound; }
   int64_t getCount() const { return Count; }
 
   static bool classof(const Metadata *MD) {
@@ -234,7 +366,7 @@ class MDEnumerator : public DebugNode {
                ArrayRef<Metadata *> Ops)
       : DebugNode(C, MDEnumeratorKind, Storage, dwarf::DW_TAG_enumerator, Ops),
         Value(Value) {}
-  ~MDEnumerator() {}
+  ~MDEnumerator() = default;
 
   static MDEnumerator *getImpl(LLVMContext &Context, int64_t Value,
                                StringRef Name, StorageType Storage,
@@ -279,20 +411,30 @@ protected:
   MDScope(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
           ArrayRef<Metadata *> Ops)
       : DebugNode(C, ID, Storage, Tag, Ops) {}
-  ~MDScope() {}
+  ~MDScope() = default;
 
 public:
-  /// \brief Return the underlying file.
+  MDFile *getFile() const { return cast_or_null<MDFile>(getRawFile()); }
+
+  inline StringRef getFilename() const;
+  inline StringRef getDirectory() const;
+
+  StringRef getName() const;
+  MDScopeRef getScope() const;
+
+  /// \brief Return the raw underlying file.
   ///
   /// An \a MDFile is an \a MDScope, but it doesn't point at a separate file
   /// (it\em is the file).  If \c this is an \a MDFile, we need to return \c
   /// this.  Otherwise, return the first operand, which is where all other
   /// subclasses store their file pointer.
-  Metadata *getFile() const {
+  Metadata *getRawFile() const {
     return isa<MDFile>(this) ? const_cast<MDScope *>(this)
                              : static_cast<Metadata *>(getOperand(0));
   }
 
+  MDScopeRef getRef() const { return MDScopeRef::get(this); }
+
   static bool classof(const Metadata *MD) {
     switch (MD->getMetadataID()) {
     default:
@@ -322,7 +464,7 @@ class MDFile : public MDScope {
 
   MDFile(LLVMContext &C, StorageType Storage, ArrayRef<Metadata *> Ops)
       : MDScope(C, MDFileKind, Storage, dwarf::DW_TAG_file_type, Ops) {}
-  ~MDFile() {}
+  ~MDFile() = default;
 
   static MDFile *getImpl(LLVMContext &Context, StringRef Filename,
                          StringRef Directory, StorageType Storage,
@@ -358,6 +500,18 @@ public:
   }
 };
 
+StringRef MDScope::getFilename() const {
+  if (auto *F = getFile())
+    return F->getFilename();
+  return "";
+}
+
+StringRef MDScope::getDirectory() const {
+  if (auto *F = getFile())
+    return F->getDirectory();
+  return "";
+}
+
 /// \brief Base class for types.
 ///
 /// TODO: Remove the hardcoded name and context, since many types don't use
@@ -377,7 +531,7 @@ protected:
       : MDScope(C, ID, Storage, Tag, Ops), Line(Line), Flags(Flags),
         SizeInBits(SizeInBits), AlignInBits(AlignInBits),
         OffsetInBits(OffsetInBits) {}
-  ~MDType() {}
+  ~MDType() = default;
 
 public:
   TempMDType clone() const {
@@ -390,9 +544,11 @@ public:
   uint64_t getOffsetInBits() const { return OffsetInBits; }
   unsigned getFlags() const { return Flags; }
 
-  Metadata *getScope() const { return getOperand(1); }
+  MDScopeRef getScope() const { return MDScopeRef(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
 
+
+  Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
 
   void setFlags(unsigned NewFlags) {
@@ -400,6 +556,31 @@ public:
     Flags = NewFlags;
   }
 
+  bool isPrivate() const {
+    return (getFlags() & FlagAccessibility) == FlagPrivate;
+  }
+  bool isProtected() const {
+    return (getFlags() & FlagAccessibility) == FlagProtected;
+  }
+  bool isPublic() const {
+    return (getFlags() & FlagAccessibility) == FlagPublic;
+  }
+  bool isForwardDecl() const { return getFlags() & FlagFwdDecl; }
+  bool isAppleBlockExtension() const { return getFlags() & FlagAppleBlock; }
+  bool isBlockByrefStruct() const { return getFlags() & FlagBlockByrefStruct; }
+  bool isVirtual() const { return getFlags() & FlagVirtual; }
+  bool isArtificial() const { return getFlags() & FlagArtificial; }
+  bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
+  bool isObjcClassComplete() const {
+    return getFlags() & FlagObjcClassComplete;
+  }
+  bool isVector() const { return getFlags() & FlagVector; }
+  bool isStaticMember() const { return getFlags() & FlagStaticMember; }
+  bool isLValueReference() const { return getFlags() & FlagLValueReference; }
+  bool isRValueReference() const { return getFlags() & FlagRValueReference; }
+
+  MDTypeRef getRef() const { return MDTypeRef::get(this); }
+
   static bool classof(const Metadata *MD) {
     switch (MD->getMetadataID()) {
     default:
@@ -413,7 +594,7 @@ public:
   }
 };
 
-/// \brief Basic type.
+/// \brief Basic type, like 'int' or 'float'.
 ///
 /// TODO: Split out DW_TAG_unspecified_type.
 /// TODO: Drop unused accessors.
@@ -429,7 +610,7 @@ class MDBasicType : public MDType {
       : MDType(C, MDBasicTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0,
                0, Ops),
         Encoding(Encoding) {}
-  ~MDBasicType() {}
+  ~MDBasicType() = default;
 
   static MDBasicType *getImpl(LLVMContext &Context, unsigned Tag,
                               StringRef Name, uint64_t SizeInBits,
@@ -480,10 +661,11 @@ protected:
                     ArrayRef<Metadata *> Ops)
       : MDType(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
                Flags, Ops) {}
-  ~MDDerivedTypeBase() {}
+  ~MDDerivedTypeBase() = default;
 
 public:
-  Metadata *getBaseType() const { return getOperand(3); }
+  MDTypeRef getBaseType() const { return MDTypeRef(getRawBaseType()); }
+  Metadata *getRawBaseType() const { return getOperand(3); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDDerivedTypeKind ||
@@ -507,11 +689,11 @@ class MDDerivedType : public MDDerivedTypeBase {
                 uint64_t OffsetInBits, unsigned Flags, ArrayRef<Metadata *> Ops)
       : MDDerivedTypeBase(C, MDDerivedTypeKind, Storage, Tag, Line, SizeInBits,
                           AlignInBits, OffsetInBits, Flags, Ops) {}
-  ~MDDerivedType() {}
+  ~MDDerivedType() = default;
 
   static MDDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
-                                StringRef Name, Metadata *File, unsigned Line,
-                                Metadata *Scope, Metadata *BaseType,
+                                StringRef Name, MDFile *File, unsigned Line,
+                                MDScopeRef Scope, MDTypeRef BaseType,
                                 uint64_t SizeInBits, uint64_t AlignInBits,
                                 uint64_t OffsetInBits, unsigned Flags,
                                 Metadata *ExtraData, StorageType Storage,
@@ -545,11 +727,10 @@ public:
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, ExtraData))
   DEFINE_MDNODE_GET(MDDerivedType,
-                    (unsigned Tag, StringRef Name, Metadata *File,
-                     unsigned Line, Metadata *Scope, Metadata *BaseType,
-                     uint64_t SizeInBits, uint64_t AlignInBits,
-                     uint64_t OffsetInBits, unsigned Flags,
-                     Metadata *ExtraData = nullptr),
+                    (unsigned Tag, StringRef Name, MDFile *File, unsigned Line,
+                     MDScopeRef Scope, MDTypeRef BaseType, uint64_t SizeInBits,
+                     uint64_t AlignInBits, uint64_t OffsetInBits,
+                     unsigned Flags, Metadata *ExtraData = nullptr),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, ExtraData))
 
@@ -562,7 +743,25 @@ public:
   ///
   /// TODO: Separate out types that need this extra operand: pointer-to-member
   /// types and member fields (static members and ivars).
-  Metadata *getExtraData() const { return getOperand(4); }
+  Metadata *getExtraData() const { return getRawExtraData(); }
+  Metadata *getRawExtraData() const { return getOperand(4); }
+
+  /// \brief Get casted version of extra data.
+  /// @{
+  MDTypeRef getClassType() const {
+    assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
+    return MDTypeRef(getExtraData());
+  }
+  MDObjCProperty *getObjCProperty() const {
+    return dyn_cast_or_null<MDObjCProperty>(getExtraData());
+  }
+  Constant *getConstant() const {
+    assert(getTag() == dwarf::DW_TAG_member && isStaticMember());
+    if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+      return C->getValue();
+    return nullptr;
+  }
+  /// @}
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDDerivedTypeKind;
@@ -584,15 +783,28 @@ protected:
       : MDDerivedTypeBase(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits,
                           OffsetInBits, Flags, Ops),
         RuntimeLang(RuntimeLang) {}
-  ~MDCompositeTypeBase() {}
+  ~MDCompositeTypeBase() = default;
 
 public:
-  Metadata *getElements() const { return getOperand(4); }
-  Metadata *getVTableHolder() const { return getOperand(5); }
-  Metadata *getTemplateParams() const { return getOperand(6); }
+  /// \brief Get the elements of the composite type.
+  ///
+  /// \note Calling this is only valid for \a MDCompositeType.  This assertion
+  /// can be removed once \a MDSubroutineType has been separated from
+  /// "composite types".
+  DebugNodeArray getElements() const {
+    assert(!isa<MDSubroutineType>(this) && "no elements for DISubroutineType");
+    return cast_or_null<MDTuple>(getRawElements());
+  }
+  MDTypeRef getVTableHolder() const { return MDTypeRef(getRawVTableHolder()); }
+  MDTemplateParameterArray getTemplateParams() const {
+    return cast_or_null<MDTuple>(getRawTemplateParams());
+  }
   StringRef getIdentifier() const { return getStringOperand(7); }
   unsigned getRuntimeLang() const { return RuntimeLang; }
 
+  Metadata *getRawElements() const { return getOperand(4); }
+  Metadata *getRawVTableHolder() const { return getOperand(5); }
+  Metadata *getRawTemplateParams() const { return getOperand(6); }
   MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
 
   /// \brief Replace operands.
@@ -601,20 +813,19 @@ public:
   /// this will be RAUW'ed and deleted.  Use a \a TrackingMDRef to keep track
   /// of its movement if necessary.
   /// @{
-  void replaceElements(MDTuple *Elements) {
+  void replaceElements(DebugNodeArray Elements) {
 #ifndef NDEBUG
-    if (auto *Old = cast_or_null<MDTuple>(getElements()))
-      for (const auto &Op : Old->operands())
-        assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
-               "Lost a member during member list replacement");
+    for (DebugNode *Op : getElements())
+      assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
+             "Lost a member during member list replacement");
 #endif
-    replaceOperandWith(4, Elements);
+    replaceOperandWith(4, Elements.get());
   }
-  void replaceVTableHolder(Metadata *VTableHolder) {
+  void replaceVTableHolder(MDTypeRef VTableHolder) {
     replaceOperandWith(5, VTableHolder);
   }
-  void replaceTemplateParams(MDTuple *TemplateParams) {
-    replaceOperandWith(6, TemplateParams);
+  void replaceTemplateParams(MDTemplateParameterArray TemplateParams) {
+    replaceOperandWith(6, TemplateParams.get());
   }
   /// @}
 
@@ -639,20 +850,20 @@ class MDCompositeType : public MDCompositeTypeBase {
       : MDCompositeTypeBase(C, MDCompositeTypeKind, Storage, Tag, Line,
                             RuntimeLang, SizeInBits, AlignInBits, OffsetInBits,
                             Flags, Ops) {}
-  ~MDCompositeType() {}
+  ~MDCompositeType() = default;
 
   static MDCompositeType *
   getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, Metadata *File,
-          unsigned Line, Metadata *Scope, Metadata *BaseType,
+          unsigned Line, MDScopeRef Scope, MDTypeRef BaseType,
           uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-          uint64_t Flags, Metadata *Elements, unsigned RuntimeLang,
-          Metadata *VTableHolder, Metadata *TemplateParams,
+          uint64_t Flags, DebugNodeArray Elements, unsigned RuntimeLang,
+          MDTypeRef VTableHolder, MDTemplateParameterArray TemplateParams,
           StringRef Identifier, StorageType Storage, bool ShouldCreate = true) {
-    return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
-                   Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits,
-                   Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
-                   getCanonicalMDString(Context, Identifier), Storage,
-                   ShouldCreate);
+    return getImpl(
+        Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
+        BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
+        RuntimeLang, VTableHolder, TemplateParams.get(),
+        getCanonicalMDString(Context, Identifier), Storage, ShouldCreate);
   }
   static MDCompositeType *
   getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
@@ -672,12 +883,12 @@ class MDCompositeType : public MDCompositeTypeBase {
 
 public:
   DEFINE_MDNODE_GET(MDCompositeType,
-                    (unsigned Tag, StringRef Name, Metadata *File,
-                     unsigned Line, Metadata *Scope, Metadata *BaseType,
-                     uint64_t SizeInBits, uint64_t AlignInBits,
-                     uint64_t OffsetInBits, unsigned Flags, Metadata *Elements,
-                     unsigned RuntimeLang, Metadata *VTableHolder,
-                     Metadata *TemplateParams = nullptr,
+                    (unsigned Tag, StringRef Name, MDFile *File, unsigned Line,
+                     MDScopeRef Scope, MDTypeRef BaseType, uint64_t SizeInBits,
+                     uint64_t AlignInBits, uint64_t OffsetInBits,
+                     unsigned Flags, DebugNodeArray Elements,
+                     unsigned RuntimeLang, MDTypeRef VTableHolder,
+                     MDTemplateParameterArray TemplateParams = nullptr,
                      StringRef Identifier = ""),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
@@ -701,6 +912,14 @@ public:
   }
 };
 
+template <class T> TypedDebugNodeRef<T> TypedDebugNodeRef<T>::get(const T *N) {
+  if (N)
+    if (auto *Composite = dyn_cast<MDCompositeType>(N))
+      if (auto *S = Composite->getRawIdentifier())
+        return TypedDebugNodeRef<T>(S);
+  return TypedDebugNodeRef<T>(N);
+}
+
 /// \brief Type array for a subprogram.
 ///
 /// TODO: Detach from CompositeType, and fold the array of types in directly
@@ -714,9 +933,15 @@ class MDSubroutineType : public MDCompositeTypeBase {
       : MDCompositeTypeBase(C, MDSubroutineTypeKind, Storage,
                             dwarf::DW_TAG_subroutine_type, 0, 0, 0, 0, 0, Flags,
                             Ops) {}
-  ~MDSubroutineType() {}
+  ~MDSubroutineType() = default;
 
   static MDSubroutineType *getImpl(LLVMContext &Context, unsigned Flags,
+                                   MDTypeRefArray TypeArray,
+                                   StorageType Storage,
+                                   bool ShouldCreate = true) {
+    return getImpl(Context, Flags, TypeArray.get(), Storage, ShouldCreate);
+  }
+  static MDSubroutineType *getImpl(LLVMContext &Context, unsigned Flags,
                                    Metadata *TypeArray, StorageType Storage,
                                    bool ShouldCreate = true);
 
@@ -725,12 +950,18 @@ class MDSubroutineType : public MDCompositeTypeBase {
   }
 
 public:
+  DEFINE_MDNODE_GET(MDSubroutineType,
+                    (unsigned Flags, MDTypeRefArray TypeArray),
+                    (Flags, TypeArray))
   DEFINE_MDNODE_GET(MDSubroutineType, (unsigned Flags, Metadata *TypeArray),
                     (Flags, TypeArray))
 
   TempMDSubroutineType clone() const { return cloneImpl(); }
 
-  Metadata *getTypeArray() const { return getElements(); }
+  MDTypeRefArray getTypeArray() const {
+    return cast_or_null<MDTuple>(getRawTypeArray());
+  }
+  Metadata *getRawTypeArray() const { return getRawElements(); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDSubroutineTypeKind;
@@ -753,22 +984,23 @@ class MDCompileUnit : public MDScope {
       : MDScope(C, MDCompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
         SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
         RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind) {}
-  ~MDCompileUnit() {}
+  ~MDCompileUnit() = default;
 
   static MDCompileUnit *
-  getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
+  getImpl(LLVMContext &Context, unsigned SourceLanguage, MDFile *File,
           StringRef Producer, bool IsOptimized, StringRef Flags,
           unsigned RuntimeVersion, StringRef SplitDebugFilename,
-          unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
-          Metadata *Subprograms, Metadata *GlobalVariables,
-          Metadata *ImportedEntities, StorageType Storage,
+          unsigned EmissionKind, MDCompositeTypeArray EnumTypes,
+          MDTypeArray RetainedTypes, MDSubprogramArray Subprograms,
+          MDGlobalVariableArray GlobalVariables,
+          MDImportedEntityArray ImportedEntities, StorageType Storage,
           bool ShouldCreate = true) {
-    return getImpl(Context, SourceLanguage, File,
-                   getCanonicalMDString(Context, Producer), IsOptimized,
-                   getCanonicalMDString(Context, Flags), RuntimeVersion,
-                   getCanonicalMDString(Context, SplitDebugFilename),
-                   EmissionKind, EnumTypes, RetainedTypes, Subprograms,
-                   GlobalVariables, ImportedEntities, Storage, ShouldCreate);
+    return getImpl(
+        Context, SourceLanguage, File, getCanonicalMDString(Context, Producer),
+        IsOptimized, getCanonicalMDString(Context, Flags), RuntimeVersion,
+        getCanonicalMDString(Context, SplitDebugFilename), EmissionKind,
+        EnumTypes.get(), RetainedTypes.get(), Subprograms.get(),
+        GlobalVariables.get(), ImportedEntities.get(), Storage, ShouldCreate);
   }
   static MDCompileUnit *
   getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
@@ -789,12 +1021,13 @@ class MDCompileUnit : public MDScope {
 
 public:
   DEFINE_MDNODE_GET(MDCompileUnit,
-                    (unsigned SourceLanguage, Metadata *File,
-                     StringRef Producer, bool IsOptimized, StringRef Flags,
-                     unsigned RuntimeVersion, StringRef SplitDebugFilename,
-                     unsigned EmissionKind, Metadata *EnumTypes,
-                     Metadata *RetainedTypes, Metadata *Subprograms,
-                     Metadata *GlobalVariables, Metadata *ImportedEntities),
+                    (unsigned SourceLanguage, MDFile *File, StringRef Producer,
+                     bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
+                     StringRef SplitDebugFilename, unsigned EmissionKind,
+                     MDCompositeTypeArray EnumTypes, MDTypeArray RetainedTypes,
+                     MDSubprogramArray Subprograms,
+                     MDGlobalVariableArray GlobalVariables,
+                     MDImportedEntityArray ImportedEntities),
                     (SourceLanguage, File, Producer, IsOptimized, Flags,
                      RuntimeVersion, SplitDebugFilename, EmissionKind,
                      EnumTypes, RetainedTypes, Subprograms, GlobalVariables,
@@ -820,17 +1053,32 @@ public:
   StringRef getProducer() const { return getStringOperand(1); }
   StringRef getFlags() const { return getStringOperand(2); }
   StringRef getSplitDebugFilename() const { return getStringOperand(3); }
-  Metadata *getEnumTypes() const { return getOperand(4); }
-  Metadata *getRetainedTypes() const { return getOperand(5); }
-  Metadata *getSubprograms() const { return getOperand(6); }
-  Metadata *getGlobalVariables() const { return getOperand(7); }
-  Metadata *getImportedEntities() const { return getOperand(8); }
+  MDCompositeTypeArray getEnumTypes() const {
+    return cast_or_null<MDTuple>(getRawEnumTypes());
+  }
+  MDTypeArray getRetainedTypes() const {
+    return cast_or_null<MDTuple>(getRawRetainedTypes());
+  }
+  MDSubprogramArray getSubprograms() const {
+    return cast_or_null<MDTuple>(getRawSubprograms());
+  }
+  MDGlobalVariableArray getGlobalVariables() const {
+    return cast_or_null<MDTuple>(getRawGlobalVariables());
+  }
+  MDImportedEntityArray getImportedEntities() const {
+    return cast_or_null<MDTuple>(getRawImportedEntities());
+  }
 
   MDString *getRawProducer() const { return getOperandAs<MDString>(1); }
   MDString *getRawFlags() const { return getOperandAs<MDString>(2); }
   MDString *getRawSplitDebugFilename() const {
     return getOperandAs<MDString>(3);
   }
+  Metadata *getRawEnumTypes() const { return getOperand(4); }
+  Metadata *getRawRetainedTypes() const { return getOperand(5); }
+  Metadata *getRawSubprograms() const { return getOperand(6); }
+  Metadata *getRawGlobalVariables() const { return getOperand(7); }
+  Metadata *getRawImportedEntities() const { return getOperand(8); }
 
   /// \brief Replace arrays.
   ///
@@ -838,8 +1086,12 @@ public:
   /// deleted on a uniquing collision.  In practice, uniquing collisions on \a
   /// MDCompileUnit should be fairly rare.
   /// @{
-  void replaceSubprograms(MDTuple *N) { replaceOperandWith(6, N); }
-  void replaceGlobalVariables(MDTuple *N) { replaceOperandWith(7, N); }
+  void replaceSubprograms(MDSubprogramArray N) {
+    replaceOperandWith(6, N.get());
+  }
+  void replaceGlobalVariables(MDGlobalVariableArray N) {
+    replaceOperandWith(7, N.get());
+  }
   /// @}
 
   static bool classof(const Metadata *MD) {
@@ -857,9 +1109,15 @@ protected:
   MDLocalScope(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
                ArrayRef<Metadata *> Ops)
       : MDScope(C, ID, Storage, Tag, Ops) {}
-  ~MDLocalScope() {}
+  ~MDLocalScope() = default;
 
 public:
+  /// \brief Get the subprogram for this scope.
+  ///
+  /// Return this if it's an \a MDSubprogram; otherwise, look up the scope
+  /// chain.
+  MDSubprogram *getSubprogram() const;
+
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDSubprogramKind ||
            MD->getMetadataID() == MDLexicalBlockKind ||
@@ -914,12 +1172,58 @@ public:
   unsigned getLine() const { return SubclassData32; }
   unsigned getColumn() const { return SubclassData16; }
   MDLocalScope *getScope() const {
-    return cast_or_null<MDLocalScope>(getRawScope());
+    return cast<MDLocalScope>(getRawScope());
   }
   MDLocation *getInlinedAt() const {
     return cast_or_null<MDLocation>(getRawInlinedAt());
   }
 
+  MDFile *getFile() const { return getScope()->getFile(); }
+  StringRef getFilename() const { return getScope()->getFilename(); }
+  StringRef getDirectory() const { return getScope()->getDirectory(); }
+
+  /// \brief Get the scope where this is inlined.
+  ///
+  /// Walk through \a getInlinedAt() and return \a getScope() from the deepest
+  /// location.
+  MDLocalScope *getInlinedAtScope() const {
+    if (auto *IA = getInlinedAt())
+      return IA->getInlinedAtScope();
+    return getScope();
+  }
+
+  /// \brief Check whether this can be discriminated from another location.
+  ///
+  /// Check \c this can be discriminated from \c RHS in a linetable entry.
+  /// Scope and inlined-at chains are not recorded in the linetable, so they
+  /// cannot be used to distinguish basic blocks.
+  ///
+  /// The current implementation is weaker than it should be, since it just
+  /// checks filename and line.
+  ///
+  /// FIXME: Add a check for getDiscriminator().
+  /// FIXME: Add a check for getColumn().
+  /// FIXME: Change the getFilename() check to getFile() (or add one for
+  /// getDirectory()).
+  bool canDiscriminate(const MDLocation &RHS) const {
+    return getFilename() != RHS.getFilename() || getLine() != RHS.getLine();
+  }
+
+  /// \brief Get the DWARF discriminator.
+  ///
+  /// DWARF discriminators distinguish identical file locations between
+  /// instructions that are on different basic blocks.
+  inline unsigned getDiscriminator() const;
+
+  /// \brief Compute new discriminator in the given context.
+  ///
+  /// This modifies the \a LLVMContext that \c this is in to increment the next
+  /// discriminator for \c this's line/filename combination.
+  ///
+  /// FIXME: Delete this.  See comments in implementation and at the only call
+  /// site in \a AddDiscriminators::runOnFunction().
+  unsigned computeNewDiscriminator() const;
+
   Metadata *getRawScope() const { return getOperand(0); }
   Metadata *getRawInlinedAt() const {
     if (getNumOperands() == 2)
@@ -958,21 +1262,23 @@ class MDSubprogram : public MDLocalScope {
         Line(Line), ScopeLine(ScopeLine), Virtuality(Virtuality),
         VirtualIndex(VirtualIndex), Flags(Flags), IsLocalToUnit(IsLocalToUnit),
         IsDefinition(IsDefinition), IsOptimized(IsOptimized) {}
-  ~MDSubprogram() {}
+  ~MDSubprogram() = default;
 
   static MDSubprogram *
-  getImpl(LLVMContext &Context, Metadata *Scope, StringRef Name,
-          StringRef LinkageName, Metadata *File, unsigned Line, Metadata *Type,
-          bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
-          Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
-          unsigned Flags, bool IsOptimized, Metadata *Function,
-          Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
+  getImpl(LLVMContext &Context, MDScopeRef Scope, StringRef Name,
+          StringRef LinkageName, MDFile *File, unsigned Line,
+          MDSubroutineType *Type, bool IsLocalToUnit, bool IsDefinition,
+          unsigned ScopeLine, MDTypeRef ContainingType, unsigned Virtuality,
+          unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
+          Constant *Function, MDTemplateParameterArray TemplateParams,
+          MDSubprogram *Declaration, MDLocalVariableArray Variables,
           StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
-                   Virtuality, VirtualIndex, Flags, IsOptimized, Function,
-                   TemplateParams, Declaration, Variables, Storage,
+                   Virtuality, VirtualIndex, Flags, IsOptimized,
+                   Function ? ConstantAsMetadata::get(Function) : nullptr,
+                   TemplateParams.get(), Declaration, Variables.get(), Storage,
                    ShouldCreate);
   }
   static MDSubprogram *
@@ -989,22 +1295,25 @@ class MDSubprogram : public MDLocalScope {
                         getFile(), getLine(), getType(), isLocalToUnit(),
                         isDefinition(), getScopeLine(), getContainingType(),
                         getVirtuality(), getVirtualIndex(), getFlags(),
-                        isOptimized(), getFunction(), getTemplateParams(),
-                        getDeclaration(), getVariables());
+                        isOptimized(), getFunctionConstant(),
+                        getTemplateParams(), getDeclaration(), getVariables());
   }
 
 public:
-  DEFINE_MDNODE_GET(
-      MDSubprogram,
-      (Metadata * Scope, StringRef Name, StringRef LinkageName, Metadata *File,
-       unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
-       unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
-       unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
-       Metadata *Function = nullptr, Metadata *TemplateParams = nullptr,
-       Metadata *Declaration = nullptr, Metadata *Variables = nullptr),
-      (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
-       ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags, IsOptimized,
-       Function, TemplateParams, Declaration, Variables))
+  DEFINE_MDNODE_GET(MDSubprogram,
+                    (MDScopeRef Scope, StringRef Name, StringRef LinkageName,
+                     MDFile *File, unsigned Line, MDSubroutineType *Type,
+                     bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
+                     MDTypeRef ContainingType, unsigned Virtuality,
+                     unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
+                     Constant *Function = nullptr,
+                     MDTemplateParameterArray TemplateParams = nullptr,
+                     MDSubprogram *Declaration = nullptr,
+                     MDLocalVariableArray Variables = nullptr),
+                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
+                     IsDefinition, ScopeLine, ContainingType, Virtuality,
+                     VirtualIndex, Flags, IsOptimized, Function, TemplateParams,
+                     Declaration, Variables))
   DEFINE_MDNODE_GET(
       MDSubprogram,
       (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
@@ -1029,7 +1338,36 @@ public:
   bool isDefinition() const { return IsDefinition; }
   bool isOptimized() const { return IsOptimized; }
 
-  Metadata *getScope() const { return getOperand(1); }
+  unsigned isArtificial() const { return getFlags() & FlagArtificial; }
+  bool isPrivate() const {
+    return (getFlags() & FlagAccessibility) == FlagPrivate;
+  }
+  bool isProtected() const {
+    return (getFlags() & FlagAccessibility) == FlagProtected;
+  }
+  bool isPublic() const {
+    return (getFlags() & FlagAccessibility) == FlagPublic;
+  }
+  bool isExplicit() const { return getFlags() & FlagExplicit; }
+  bool isPrototyped() const { return getFlags() & FlagPrototyped; }
+
+  /// \brief Check if this is reference-qualified.
+  ///
+  /// Return true if this subprogram is a C++11 reference-qualified non-static
+  /// member function (void foo() &).
+  unsigned isLValueReference() const {
+    return getFlags() & FlagLValueReference;
+  }
+
+  /// \brief Check if this is rvalue-reference-qualified.
+  ///
+  /// Return true if this subprogram is a C++11 rvalue-reference-qualified
+  /// non-static member function (void foo() &&).
+  unsigned isRValueReference() const {
+    return getFlags() & FlagRValueReference;
+  }
+
+  MDScopeRef getScope() const { return MDScopeRef(getRawScope()); }
 
   StringRef getName() const { return getStringOperand(2); }
   StringRef getDisplayName() const { return getStringOperand(3); }
@@ -1038,13 +1376,42 @@ public:
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
   MDString *getRawLinkageName() const { return getOperandAs<MDString>(4); }
 
-  Metadata *getType() const { return getOperand(5); }
-  Metadata *getContainingType() const { return getOperand(6); }
+  MDSubroutineType *getType() const {
+    return cast_or_null<MDSubroutineType>(getRawType());
+  }
+  MDTypeRef getContainingType() const {
+    return MDTypeRef(getRawContainingType());
+  }
 
-  Metadata *getFunction() const { return getOperand(7); }
-  Metadata *getTemplateParams() const { return getOperand(8); }
-  Metadata *getDeclaration() const { return getOperand(9); }
-  Metadata *getVariables() const { return getOperand(10); }
+  Constant *getFunctionConstant() const {
+    if (auto *C = cast_or_null<ConstantAsMetadata>(getRawFunction()))
+      return C->getValue();
+    return nullptr;
+  }
+  MDTemplateParameterArray getTemplateParams() const {
+    return cast_or_null<MDTuple>(getRawTemplateParams());
+  }
+  MDSubprogram *getDeclaration() const {
+    return cast_or_null<MDSubprogram>(getRawDeclaration());
+  }
+  MDLocalVariableArray getVariables() const {
+    return cast_or_null<MDTuple>(getRawVariables());
+  }
+
+  Metadata *getRawScope() const { return getOperand(1); }
+  Metadata *getRawType() const { return getOperand(5); }
+  Metadata *getRawContainingType() const { return getOperand(6); }
+  Metadata *getRawFunction() const { return getOperand(7); }
+  Metadata *getRawTemplateParams() const { return getOperand(8); }
+  Metadata *getRawDeclaration() const { return getOperand(9); }
+  Metadata *getRawVariables() const { return getOperand(10); }
+
+  /// \brief Get a pointer to the function this subprogram describes.
+  ///
+  /// This dyn_casts \a getFunctionConstant() to \a Function.
+  ///
+  /// FIXME: Should this be looking through bitcasts?
+  Function *getFunction() const;
 
   /// \brief Replace the function.
   ///
@@ -1057,6 +1424,11 @@ public:
   void replaceFunction(std::nullptr_t) { replaceOperandWith(7, nullptr); }
   /// @}
 
+  /// \brief Check if this subprogram decribes the given function.
+  ///
+  /// FIXME: Should this be looking through bitcasts?
+  bool describes(const Function *F) const;
+
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDSubprogramKind;
   }
@@ -1067,11 +1439,20 @@ protected:
   MDLexicalBlockBase(LLVMContext &C, unsigned ID, StorageType Storage,
                      ArrayRef<Metadata *> Ops)
       : MDLocalScope(C, ID, Storage, dwarf::DW_TAG_lexical_block, Ops) {}
-  ~MDLexicalBlockBase() {}
+  ~MDLexicalBlockBase() = default;
 
 public:
-  Metadata *getScope() const { return getOperand(1); }
+  MDLocalScope *getScope() const { return cast<MDLocalScope>(getRawScope()); }
 
+  Metadata *getRawScope() const { return getOperand(1); }
+
+  /// \brief Forwarding accessors to LexicalBlock.
+  ///
+  /// TODO: Remove these and update code to use \a MDLexicalBlock directly.
+  /// @{
+  inline unsigned getLine() const;
+  inline unsigned getColumn() const;
+  /// @}
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDLexicalBlockKind ||
            MD->getMetadataID() == MDLexicalBlockFileKind;
@@ -1089,7 +1470,16 @@ class MDLexicalBlock : public MDLexicalBlockBase {
                  unsigned Column, ArrayRef<Metadata *> Ops)
       : MDLexicalBlockBase(C, MDLexicalBlockKind, Storage, Ops), Line(Line),
         Column(Column) {}
-  ~MDLexicalBlock() {}
+  ~MDLexicalBlock() = default;
+
+  static MDLexicalBlock *getImpl(LLVMContext &Context, MDLocalScope *Scope,
+                                 MDFile *File, unsigned Line, unsigned Column,
+                                 StorageType Storage,
+                                 bool ShouldCreate = true) {
+    return getImpl(Context, static_cast<Metadata *>(Scope),
+                   static_cast<Metadata *>(File), Line, Column, Storage,
+                   ShouldCreate);
+  }
 
   static MDLexicalBlock *getImpl(LLVMContext &Context, Metadata *Scope,
                                  Metadata *File, unsigned Line, unsigned Column,
@@ -1101,6 +1491,9 @@ class MDLexicalBlock : public MDLexicalBlockBase {
   }
 
 public:
+  DEFINE_MDNODE_GET(MDLexicalBlock, (MDLocalScope * Scope, MDFile *File,
+                                     unsigned Line, unsigned Column),
+                    (Scope, File, Line, Column))
   DEFINE_MDNODE_GET(MDLexicalBlock, (Metadata * Scope, Metadata *File,
                                      unsigned Line, unsigned Column),
                     (Scope, File, Line, Column))
@@ -1115,6 +1508,18 @@ public:
   }
 };
 
+unsigned MDLexicalBlockBase::getLine() const {
+  if (auto *N = dyn_cast<MDLexicalBlock>(this))
+    return N->getLine();
+  return 0;
+}
+
+unsigned MDLexicalBlockBase::getColumn() const {
+  if (auto *N = dyn_cast<MDLexicalBlock>(this))
+    return N->getColumn();
+  return 0;
+}
+
 class MDLexicalBlockFile : public MDLexicalBlockBase {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -1125,7 +1530,16 @@ class MDLexicalBlockFile : public MDLexicalBlockBase {
                      unsigned Discriminator, ArrayRef<Metadata *> Ops)
       : MDLexicalBlockBase(C, MDLexicalBlockFileKind, Storage, Ops),
         Discriminator(Discriminator) {}
-  ~MDLexicalBlockFile() {}
+  ~MDLexicalBlockFile() = default;
+
+  static MDLexicalBlockFile *getImpl(LLVMContext &Context, MDLocalScope *Scope,
+                                     MDFile *File, unsigned Discriminator,
+                                     StorageType Storage,
+                                     bool ShouldCreate = true) {
+    return getImpl(Context, static_cast<Metadata *>(Scope),
+                   static_cast<Metadata *>(File), Discriminator, Storage,
+                   ShouldCreate);
+  }
 
   static MDLexicalBlockFile *getImpl(LLVMContext &Context, Metadata *Scope,
                                      Metadata *File, unsigned Discriminator,
@@ -1138,12 +1552,19 @@ class MDLexicalBlockFile : public MDLexicalBlockBase {
   }
 
 public:
+  DEFINE_MDNODE_GET(MDLexicalBlockFile, (MDLocalScope * Scope, MDFile *File,
+                                         unsigned Discriminator),
+                    (Scope, File, Discriminator))
   DEFINE_MDNODE_GET(MDLexicalBlockFile,
                     (Metadata * Scope, Metadata *File, unsigned Discriminator),
                     (Scope, File, Discriminator))
 
   TempMDLexicalBlockFile clone() const { return cloneImpl(); }
 
+  // TODO: Remove these once they're gone from MDLexicalBlockBase.
+  unsigned getLine() const = delete;
+  unsigned getColumn() const = delete;
+
   unsigned getDiscriminator() const { return Discriminator; }
 
   static bool classof(const Metadata *MD) {
@@ -1151,6 +1572,12 @@ public:
   }
 };
 
+unsigned MDLocation::getDiscriminator() const {
+  if (auto *F = dyn_cast<MDLexicalBlockFile>(getScope()))
+    return F->getDiscriminator();
+  return 0;
+}
+
 class MDNamespace : public MDScope {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -1162,10 +1589,10 @@ class MDNamespace : public MDScope {
       : MDScope(Context, MDNamespaceKind, Storage, dwarf::DW_TAG_namespace,
                 Ops),
         Line(Line) {}
-  ~MDNamespace() {}
+  ~MDNamespace() = default;
 
-  static MDNamespace *getImpl(LLVMContext &Context, Metadata *Scope,
-                              Metadata *File, StringRef Name, unsigned Line,
+  static MDNamespace *getImpl(LLVMContext &Context, MDScope *Scope,
+                              MDFile *File, StringRef Name, unsigned Line,
                               StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Scope, File, getCanonicalMDString(Context, Name),
                    Line, Storage, ShouldCreate);
@@ -1180,8 +1607,8 @@ class MDNamespace : public MDScope {
   }
 
 public:
-  DEFINE_MDNODE_GET(MDNamespace, (Metadata * Scope, Metadata *File,
-                                  StringRef Name, unsigned Line),
+  DEFINE_MDNODE_GET(MDNamespace, (MDScope * Scope, MDFile *File, StringRef Name,
+                                  unsigned Line),
                     (Scope, File, Name, Line))
   DEFINE_MDNODE_GET(MDNamespace, (Metadata * Scope, Metadata *File,
                                   MDString *Name, unsigned Line),
@@ -1190,9 +1617,10 @@ public:
   TempMDNamespace clone() const { return cloneImpl(); }
 
   unsigned getLine() const { return Line; }
-  Metadata *getScope() const { return getOperand(1); }
+  MDScope *getScope() const { return cast_or_null<MDScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
 
+  Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
 
   static bool classof(const Metadata *MD) {
@@ -1206,13 +1634,14 @@ protected:
   MDTemplateParameter(LLVMContext &Context, unsigned ID, StorageType Storage,
                       unsigned Tag, ArrayRef<Metadata *> Ops)
       : DebugNode(Context, ID, Storage, Tag, Ops) {}
-  ~MDTemplateParameter() {}
+  ~MDTemplateParameter() = default;
 
 public:
   StringRef getName() const { return getStringOperand(0); }
-  Metadata *getType() const { return getOperand(1); }
+  MDTypeRef getType() const { return MDTypeRef(getRawType()); }
 
   MDString *getRawName() const { return getOperandAs<MDString>(0); }
+  Metadata *getRawType() const { return getOperand(1); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDTemplateTypeParameterKind ||
@@ -1228,10 +1657,10 @@ class MDTemplateTypeParameter : public MDTemplateParameter {
                           ArrayRef<Metadata *> Ops)
       : MDTemplateParameter(Context, MDTemplateTypeParameterKind, Storage,
                             dwarf::DW_TAG_template_type_parameter, Ops) {}
-  ~MDTemplateTypeParameter() {}
+  ~MDTemplateTypeParameter() = default;
 
   static MDTemplateTypeParameter *getImpl(LLVMContext &Context, StringRef Name,
-                                          Metadata *Type, StorageType Storage,
+                                          MDTypeRef Type, StorageType Storage,
                                           bool ShouldCreate = true) {
     return getImpl(Context, getCanonicalMDString(Context, Name), Type, Storage,
                    ShouldCreate);
@@ -1245,7 +1674,7 @@ class MDTemplateTypeParameter : public MDTemplateParameter {
   }
 
 public:
-  DEFINE_MDNODE_GET(MDTemplateTypeParameter, (StringRef Name, Metadata *Type),
+  DEFINE_MDNODE_GET(MDTemplateTypeParameter, (StringRef Name, MDTypeRef Type),
                     (Name, Type))
   DEFINE_MDNODE_GET(MDTemplateTypeParameter, (MDString * Name, Metadata *Type),
                     (Name, Type))
@@ -1265,10 +1694,10 @@ class MDTemplateValueParameter : public MDTemplateParameter {
                            unsigned Tag, ArrayRef<Metadata *> Ops)
       : MDTemplateParameter(Context, MDTemplateValueParameterKind, Storage, Tag,
                             Ops) {}
-  ~MDTemplateValueParameter() {}
+  ~MDTemplateValueParameter() = default;
 
   static MDTemplateValueParameter *getImpl(LLVMContext &Context, unsigned Tag,
-                                           StringRef Name, Metadata *Type,
+                                           StringRef Name, MDTypeRef Type,
                                            Metadata *Value, StorageType Storage,
                                            bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name), Type,
@@ -1286,7 +1715,7 @@ class MDTemplateValueParameter : public MDTemplateParameter {
 
 public:
   DEFINE_MDNODE_GET(MDTemplateValueParameter, (unsigned Tag, StringRef Name,
-                                               Metadata *Type, Metadata *Value),
+                                               MDTypeRef Type, Metadata *Value),
                     (Tag, Name, Type, Value))
   DEFINE_MDNODE_GET(MDTemplateValueParameter, (unsigned Tag, MDString *Name,
                                                Metadata *Type, Metadata *Value),
@@ -1311,16 +1740,30 @@ protected:
   MDVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
              unsigned Line, ArrayRef<Metadata *> Ops)
       : DebugNode(C, ID, Storage, Tag, Ops), Line(Line) {}
-  ~MDVariable() {}
+  ~MDVariable() = default;
 
 public:
   unsigned getLine() const { return Line; }
-  Metadata *getScope() const { return getOperand(0); }
+  MDScope *getScope() const { return cast_or_null<MDScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(1); }
-  Metadata *getFile() const { return getOperand(2); }
-  Metadata *getType() const { return getOperand(3); }
+  MDFile *getFile() const { return cast_or_null<MDFile>(getRawFile()); }
+  MDTypeRef getType() const { return MDTypeRef(getRawType()); }
+
+  StringRef getFilename() const {
+    if (auto *F = getFile())
+      return F->getFilename();
+    return "";
+  }
+  StringRef getDirectory() const {
+    if (auto *F = getFile())
+      return F->getDirectory();
+    return "";
+  }
 
+  Metadata *getRawScope() const { return getOperand(0); }
   MDString *getRawName() const { return getOperandAs<MDString>(1); }
+  Metadata *getRawFile() const { return getOperand(2); }
+  Metadata *getRawType() const { return getOperand(3); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDLocalVariableKind ||
@@ -1344,17 +1787,18 @@ class MDGlobalVariable : public MDVariable {
       : MDVariable(C, MDGlobalVariableKind, Storage, dwarf::DW_TAG_variable,
                    Line, Ops),
         IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {}
-  ~MDGlobalVariable() {}
+  ~MDGlobalVariable() = default;
 
   static MDGlobalVariable *
-  getImpl(LLVMContext &Context, Metadata *Scope, StringRef Name,
-          StringRef LinkageName, Metadata *File, unsigned Line, Metadata *Type,
-          bool IsLocalToUnit, bool IsDefinition, Metadata *Variable,
-          Metadata *StaticDataMemberDeclaration, StorageType Storage,
+  getImpl(LLVMContext &Context, MDScope *Scope, StringRef Name,
+          StringRef LinkageName, MDFile *File, unsigned Line, MDTypeRef Type,
+          bool IsLocalToUnit, bool IsDefinition, Constant *Variable,
+          MDDerivedType *StaticDataMemberDeclaration, StorageType Storage,
           bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
-                   IsLocalToUnit, IsDefinition, Variable,
+                   IsLocalToUnit, IsDefinition,
+                   Variable ? ConstantAsMetadata::get(Variable) : nullptr,
                    StaticDataMemberDeclaration, Storage, ShouldCreate);
   }
   static MDGlobalVariable *
@@ -1373,10 +1817,10 @@ class MDGlobalVariable : public MDVariable {
 
 public:
   DEFINE_MDNODE_GET(MDGlobalVariable,
-                    (Metadata * Scope, StringRef Name, StringRef LinkageName,
-                     Metadata *File, unsigned Line, Metadata *Type,
-                     bool IsLocalToUnit, bool IsDefinition, Metadata *Variable,
-                     Metadata *StaticDataMemberDeclaration),
+                    (MDScope * Scope, StringRef Name, StringRef LinkageName,
+                     MDFile *File, unsigned Line, MDTypeRef Type,
+                     bool IsLocalToUnit, bool IsDefinition, Constant *Variable,
+                     MDDerivedType *StaticDataMemberDeclaration),
                     (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
                      IsDefinition, Variable, StaticDataMemberDeclaration))
   DEFINE_MDNODE_GET(MDGlobalVariable,
@@ -1393,10 +1837,18 @@ public:
   bool isDefinition() const { return IsDefinition; }
   StringRef getDisplayName() const { return getStringOperand(4); }
   StringRef getLinkageName() const { return getStringOperand(5); }
-  Metadata *getVariable() const { return getOperand(6); }
-  Metadata *getStaticDataMemberDeclaration() const { return getOperand(7); }
+  Constant *getVariable() const {
+    if (auto *C = cast_or_null<ConstantAsMetadata>(getRawVariable()))
+      return dyn_cast<Constant>(C->getValue());
+    return nullptr;
+  }
+  MDDerivedType *getStaticDataMemberDeclaration() const {
+    return cast_or_null<MDDerivedType>(getRawStaticDataMemberDeclaration());
+  }
 
   MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
+  Metadata *getRawVariable() const { return getOperand(6); }
+  Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(7); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDGlobalVariableKind;
@@ -1420,62 +1872,61 @@ class MDLocalVariable : public MDVariable {
                   ArrayRef<Metadata *> Ops)
       : MDVariable(C, MDLocalVariableKind, Storage, Tag, Line, Ops), Arg(Arg),
         Flags(Flags) {}
-  ~MDLocalVariable() {}
+  ~MDLocalVariable() = default;
 
   static MDLocalVariable *getImpl(LLVMContext &Context, unsigned Tag,
-                                  Metadata *Scope, StringRef Name,
-                                  Metadata *File, unsigned Line, Metadata *Type,
-                                  unsigned Arg, unsigned Flags,
-                                  Metadata *InlinedAt, StorageType Storage,
+                                  MDScope *Scope, StringRef Name, MDFile *File,
+                                  unsigned Line, MDTypeRef Type, unsigned Arg,
+                                  unsigned Flags, StorageType Storage,
                                   bool ShouldCreate = true) {
     return getImpl(Context, Tag, Scope, getCanonicalMDString(Context, Name),
-                   File, Line, Type, Arg, Flags, InlinedAt, Storage,
-                   ShouldCreate);
+                   File, Line, Type, Arg, Flags, Storage, ShouldCreate);
   }
-  static MDLocalVariable *getImpl(LLVMContext &Context, unsigned Tag,
-                                  Metadata *Scope, MDString *Name,
-                                  Metadata *File, unsigned Line, Metadata *Type,
-                                  unsigned Arg, unsigned Flags,
-                                  Metadata *InlinedAt, StorageType Storage,
-                                  bool ShouldCreate = true);
+  static MDLocalVariable *
+  getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, MDString *Name,
+          Metadata *File, unsigned Line, Metadata *Type, unsigned Arg,
+          unsigned Flags, StorageType Storage, bool ShouldCreate = true);
 
   TempMDLocalVariable cloneImpl() const {
     return getTemporary(getContext(), getTag(), getScope(), getName(),
-                        getFile(), getLine(), getType(), getArg(), getFlags(),
-                        getInlinedAt());
+                        getFile(), getLine(), getType(), getArg(), getFlags());
   }
 
 public:
   DEFINE_MDNODE_GET(MDLocalVariable,
-                    (unsigned Tag, Metadata *Scope, StringRef Name,
-                     Metadata *File, unsigned Line, Metadata *Type,
-                     unsigned Arg, unsigned Flags,
-                     Metadata *InlinedAt = nullptr),
-                    (Tag, Scope, Name, File, Line, Type, Arg, Flags, InlinedAt))
+                    (unsigned Tag, MDLocalScope *Scope, StringRef Name,
+                     MDFile *File, unsigned Line, MDTypeRef Type, unsigned Arg,
+                     unsigned Flags),
+                    (Tag, Scope, Name, File, Line, Type, Arg, Flags))
   DEFINE_MDNODE_GET(MDLocalVariable,
                     (unsigned Tag, Metadata *Scope, MDString *Name,
                      Metadata *File, unsigned Line, Metadata *Type,
-                     unsigned Arg, unsigned Flags,
-                     Metadata *InlinedAt = nullptr),
-                    (Tag, Scope, Name, File, Line, Type, Arg, Flags, InlinedAt))
+                     unsigned Arg, unsigned Flags),
+                    (Tag, Scope, Name, File, Line, Type, Arg, Flags))
 
   TempMDLocalVariable clone() const { return cloneImpl(); }
 
+  /// \brief Get the local scope for this variable.
+  ///
+  /// Variables must be defined in a local scope.
+  MDLocalScope *getScope() const {
+    return cast<MDLocalScope>(MDVariable::getScope());
+  }
+
   unsigned getArg() const { return Arg; }
   unsigned getFlags() const { return Flags; }
-  Metadata *getInlinedAt() const { return getOperand(4); }
 
-  /// \brief Get an inlined version of this variable.
+  bool isArtificial() const { return getFlags() & FlagArtificial; }
+  bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
+
+  /// \brief Check that a location is valid for this variable.
   ///
-  /// Returns a version of this with \a getAlinedAt() set to \c InlinedAt.
-  MDLocalVariable *withInline(MDLocation *InlinedAt) const {
-    if (InlinedAt == getInlinedAt())
-      return const_cast<MDLocalVariable *>(this);
-    auto Temp = clone();
-    Temp->replaceOperandWith(4, InlinedAt);
-    return replaceWithUniqued(std::move(Temp));
+  /// Check that \c DL exists, is in the same subprogram, and has the same
+  /// inlined-at location as \c this.  (Otherwise, it's not a valid attachemnt
+  /// to a \a DbgInfoIntrinsic.)
+  bool isValidLocationForIntrinsic(const MDLocation *DL) const {
+    return DL && getScope()->getSubprogram() == DL->getScope()->getSubprogram();
   }
-  MDLocalVariable *withoutInline() const { return withInline(nullptr); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDLocalVariableKind;
@@ -1484,6 +1935,12 @@ public:
 
 /// \brief DWARF expression.
 ///
+/// This is (almost) a DWARF expression that modifies the location of a
+/// variable or (or the location of a single piece of a variable).
+///
+/// FIXME: Instead of DW_OP_plus taking an argument, this should use DW_OP_const
+/// and have DW_OP_plus consume the topmost elements on the stack.
+///
 /// TODO: Co-allocate the expression elements.
 /// TODO: Separate from MDNode, or otherwise drop Distinct and Temporary
 /// storage types.
@@ -1496,7 +1953,7 @@ class MDExpression : public MDNode {
   MDExpression(LLVMContext &C, StorageType Storage, ArrayRef<uint64_t> Elements)
       : MDNode(C, MDExpressionKind, Storage, None),
         Elements(Elements.begin(), Elements.end()) {}
-  ~MDExpression() {}
+  ~MDExpression() = default;
 
   static MDExpression *getImpl(LLVMContext &Context,
                                ArrayRef<uint64_t> Elements, StorageType Storage,
@@ -1519,6 +1976,15 @@ public:
     return Elements[I];
   }
 
+  /// \brief Return whether this is a piece of an aggregate variable.
+  bool isBitPiece() const;
+
+  /// \brief Return the offset of this piece in bits.
+  uint64_t getBitPieceOffset() const;
+
+  /// \brief Return the size of this piece in bits.
+  uint64_t getBitPieceSize() const;
+
   typedef ArrayRef<uint64_t>::iterator element_iterator;
   element_iterator elements_begin() const { return getElements().begin(); }
   element_iterator elements_end() const { return getElements().end(); }
@@ -1573,6 +2039,13 @@ public:
       return T;
     }
 
+    /// \brief Get the next iterator.
+    ///
+    /// \a std::next() doesn't work because this is technically an
+    /// input_iterator, but it's a perfectly valid operation.  This is an
+    /// accessor to provide the same functionality.
+    expr_op_iterator getNext() const { return ++expr_op_iterator(*this); }
+
     bool operator==(const expr_op_iterator &X) const {
       return getBase() == X.getBase();
     }
@@ -1619,12 +2092,12 @@ class MDObjCProperty : public DebugNode {
       : DebugNode(C, MDObjCPropertyKind, Storage, dwarf::DW_TAG_APPLE_property,
                   Ops),
         Line(Line), Attributes(Attributes) {}
-  ~MDObjCProperty() {}
+  ~MDObjCProperty() = default;
 
   static MDObjCProperty *
-  getImpl(LLVMContext &Context, StringRef Name, Metadata *File, unsigned Line,
+  getImpl(LLVMContext &Context, StringRef Name, MDFile *File, unsigned Line,
           StringRef GetterName, StringRef SetterName, unsigned Attributes,
-          Metadata *Type, StorageType Storage, bool ShouldCreate = true) {
+          MDType *Type, StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, getCanonicalMDString(Context, Name), File, Line,
                    getCanonicalMDString(Context, GetterName),
                    getCanonicalMDString(Context, SetterName), Attributes, Type,
@@ -1644,9 +2117,9 @@ class MDObjCProperty : public DebugNode {
 
 public:
   DEFINE_MDNODE_GET(MDObjCProperty,
-                    (StringRef Name, Metadata *File, unsigned Line,
+                    (StringRef Name, MDFile *File, unsigned Line,
                      StringRef GetterName, StringRef SetterName,
-                     unsigned Attributes, Metadata *Type),
+                     unsigned Attributes, MDType *Type),
                     (Name, File, Line, GetterName, SetterName, Attributes,
                      Type))
   DEFINE_MDNODE_GET(MDObjCProperty,
@@ -1661,20 +2134,39 @@ public:
   unsigned getLine() const { return Line; }
   unsigned getAttributes() const { return Attributes; }
   StringRef getName() const { return getStringOperand(0); }
-  Metadata *getFile() const { return getOperand(1); }
+  MDFile *getFile() const { return cast_or_null<MDFile>(getRawFile()); }
   StringRef getGetterName() const { return getStringOperand(2); }
   StringRef getSetterName() const { return getStringOperand(3); }
-  Metadata *getType() const { return getOperand(4); }
+
+  /// \brief Get the type.
+  ///
+  /// \note Objective-C doesn't have an ODR, so there is no benefit in storing
+  /// the type as a DITypeRef here.
+  MDType *getType() const { return cast_or_null<MDType>(getRawType()); }
+
+  StringRef getFilename() const {
+    if (auto *F = getFile())
+      return F->getFilename();
+    return "";
+  }
+  StringRef getDirectory() const {
+    if (auto *F = getFile())
+      return F->getDirectory();
+    return "";
+  }
 
   MDString *getRawName() const { return getOperandAs<MDString>(0); }
+  Metadata *getRawFile() const { return getOperand(1); }
   MDString *getRawGetterName() const { return getOperandAs<MDString>(2); }
   MDString *getRawSetterName() const { return getOperandAs<MDString>(3); }
+  Metadata *getRawType() const { return getOperand(4); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == MDObjCPropertyKind;
   }
 };
 
+/// \brief An imported module (C++ using directive or similar).
 class MDImportedEntity : public DebugNode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -1684,10 +2176,10 @@ class MDImportedEntity : public DebugNode {
   MDImportedEntity(LLVMContext &C, StorageType Storage, unsigned Tag,
                    unsigned Line, ArrayRef<Metadata *> Ops)
       : DebugNode(C, MDImportedEntityKind, Storage, Tag, Ops), Line(Line) {}
-  ~MDImportedEntity() {}
+  ~MDImportedEntity() = default;
 
   static MDImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
-                                   Metadata *Scope, Metadata *Entity,
+                                   MDScope *Scope, DebugNodeRef Entity,
                                    unsigned Line, StringRef Name,
                                    StorageType Storage,
                                    bool ShouldCreate = true) {
@@ -1707,7 +2199,7 @@ class MDImportedEntity : public DebugNode {
 
 public:
   DEFINE_MDNODE_GET(MDImportedEntity,
-                    (unsigned Tag, Metadata *Scope, Metadata *Entity,
+                    (unsigned Tag, MDScope *Scope, DebugNodeRef Entity,
                      unsigned Line, StringRef Name = ""),
                     (Tag, Scope, Entity, Line, Name))
   DEFINE_MDNODE_GET(MDImportedEntity,
@@ -1718,10 +2210,12 @@ public:
   TempMDImportedEntity clone() const { return cloneImpl(); }
 
   unsigned getLine() const { return Line; }
-  Metadata *getScope() const { return getOperand(0); }
-  Metadata *getEntity() const { return getOperand(1); }
+  MDScope *getScope() const { return cast_or_null<MDScope>(getRawScope()); }
+  DebugNodeRef getEntity() const { return DebugNodeRef(getRawEntity()); }
   StringRef getName() const { return getStringOperand(2); }
 
+  Metadata *getRawScope() const { return getOperand(0); }
+  Metadata *getRawEntity() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
 
   static bool classof(const Metadata *MD) {
diff --git a/include/llvm/IR/DebugLoc.h b/include/llvm/IR/DebugLoc.h
index c29d5bf..f88a7b1 100644
--- a/include/llvm/IR/DebugLoc.h
+++ b/include/llvm/IR/DebugLoc.h
@@ -22,11 +22,15 @@ namespace llvm {
 
   class LLVMContext;
   class raw_ostream;
-  class MDNode;
-
-  /// DebugLoc - Debug location id.  This is carried by Instruction, SDNode,
-  /// and MachineInstr to compactly encode file/line/scope information for an
-  /// operation.
+  class MDLocation;
+
+  /// \brief A debug info location.
+  ///
+  /// This class is a wrapper around a tracking reference to an \a MDLocation
+  /// pointer.
+  ///
+  /// To avoid extra includes, \a DebugLoc doubles the \a MDLocation API with a
+  /// one based on relatively opaque \a MDNode pointers.
   class DebugLoc {
     TrackingMDNodeRef Loc;
 
@@ -43,64 +47,76 @@ namespace llvm {
       return *this;
     }
 
+    /// \brief Construct from an \a MDLocation.
+    DebugLoc(const MDLocation *L);
+
+    /// \brief Construct from an \a MDNode.
+    ///
+    /// Note: if \c N is not an \a MDLocation, a verifier check will fail, and
+    /// accessors will crash.  However, construction from other nodes is
+    /// supported in order to handle forward references when reading textual
+    /// IR.
+    explicit DebugLoc(const MDNode *N);
+
+    /// \brief Get the underlying \a MDLocation.
+    ///
+    /// \pre !*this or \c isa<MDLocation>(getAsMDNode()).
+    /// @{
+    MDLocation *get() const;
+    operator MDLocation *() const { return get(); }
+    MDLocation *operator->() const { return get(); }
+    MDLocation &operator*() const { return *get(); }
+    /// @}
+
+    /// \brief Check for null.
+    ///
+    /// Check for null in a way that is safe with broken debug info.  Unlike
+    /// the conversion to \c MDLocation, this doesn't require that \c Loc is of
+    /// the right type.  Important for cases like \a llvm::StripDebugInfo() and
+    /// \a Instruction::hasMetadata().
+    explicit operator bool() const { return Loc; }
+
     /// \brief Check whether this has a trivial destructor.
     bool hasTrivialDestructor() const { return Loc.hasTrivialDestructor(); }
 
-    /// get - Get a new DebugLoc that corresponds to the specified line/col
-    /// scope/inline location.
-    static DebugLoc get(unsigned Line, unsigned Col, MDNode *Scope,
-                        MDNode *InlinedAt = nullptr);
-
-    /// getFromDILocation - Translate the DILocation quad into a DebugLoc.
-    static DebugLoc getFromDILocation(MDNode *N);
-
-    /// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
-    static DebugLoc getFromDILexicalBlock(MDNode *N);
-
-    /// isUnknown - Return true if this is an unknown location.
-    bool isUnknown() const { return !Loc; }
+    /// \brief Create a new DebugLoc.
+    ///
+    /// Create a new DebugLoc at the specified line/col and scope/inline.  This
+    /// forwards to \a MDLocation::get().
+    ///
+    /// If \c !Scope, returns a default-constructed \a DebugLoc.
+    ///
+    /// FIXME: Remove this.  Users should use MDLocation::get().
+    static DebugLoc get(unsigned Line, unsigned Col, const MDNode *Scope,
+                        const MDNode *InlinedAt = nullptr);
 
     unsigned getLine() const;
     unsigned getCol() const;
-
-    /// getScope - This returns the scope pointer for this DebugLoc, or null if
-    /// invalid.
     MDNode *getScope() const;
-    MDNode *getScope(const LLVMContext &) const { return getScope(); }
-
-    /// getInlinedAt - This returns the InlinedAt pointer for this DebugLoc, or
-    /// null if invalid or not present.
-    MDNode *getInlinedAt() const;
-    MDNode *getInlinedAt(const LLVMContext &) const { return getInlinedAt(); }
-
-    /// getScopeAndInlinedAt - Return both the Scope and the InlinedAt values.
-    void getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA) const;
-    void getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
-                              const LLVMContext &) const {
-      return getScopeAndInlinedAt(Scope, IA);
-    }
-
-    /// getScopeNode - Get MDNode for DebugLoc's scope, or null if invalid.
-    MDNode *getScopeNode() const;
-    MDNode *getScopeNode(const LLVMContext &) const { return getScopeNode(); }
-
-    // getFnDebugLoc - Walk up the scope chain of given debug loc and find line
-    // number info for the function.
+    MDLocation *getInlinedAt() const;
+
+    /// \brief Get the fully inlined-at scope for a DebugLoc.
+    ///
+    /// Gets the inlined-at scope for a DebugLoc.
+    MDNode *getInlinedAtScope() const;
+
+    /// \brief Find the debug info location for the start of the function.
+    ///
+    /// Walk up the scope chain of given debug loc and find line number info
+    /// for the function.
+    ///
+    /// FIXME: Remove this.  Users should use MDLocation/MDLocalScope API to
+    /// find the subprogram, and then MDLocation::get().
     DebugLoc getFnDebugLoc() const;
-    DebugLoc getFnDebugLoc(const LLVMContext &) const {
-      return getFnDebugLoc();
-    }
 
-    /// getAsMDNode - This method converts the compressed DebugLoc node into a
-    /// DILocation compatible MDNode.
-    MDNode *getAsMDNode() const;
-    MDNode *getAsMDNode(LLVMContext &) const { return getAsMDNode(); }
+    /// \brief Return \c this as a bar \a MDNode.
+    MDNode *getAsMDNode() const { return Loc; }
 
     bool operator==(const DebugLoc &DL) const { return Loc == DL.Loc; }
-    bool operator!=(const DebugLoc &DL) const { return !(*this == DL); }
+    bool operator!=(const DebugLoc &DL) const { return Loc != DL.Loc; }
 
     void dump() const;
-    void dump(const LLVMContext &) const { dump(); }
+
     /// \brief prints source location /path/to/file.exe:line:col @[inlined at]
     void print(raw_ostream &OS) const;
   };
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 0cd5afb..a5eed9b 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -31,26 +31,6 @@ namespace llvm {
 class FunctionType;
 class LLVMContext;
 
-// Traits for intrusive list of basic blocks...
-template<> struct ilist_traits<BasicBlock>
-  : public SymbolTableListTraits<BasicBlock, Function> {
-
-  // createSentinel is used to get hold of the node that marks the end of the
-  // list... (same trick used here as in ilist_traits<Instruction>)
-  BasicBlock *createSentinel() const {
-    return static_cast<BasicBlock*>(&Sentinel);
-  }
-  static void destroySentinel(BasicBlock*) {}
-
-  BasicBlock *provideInitialHead() const { return createSentinel(); }
-  BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); }
-  static void noteHead(BasicBlock*, BasicBlock*) {}
-
-  static ValueSymbolTable *getSymTab(Function *ItemParent);
-private:
-  mutable ilist_half_node<BasicBlock> Sentinel;
-};
-
 template<> struct ilist_traits<Argument>
   : public SymbolTableListTraits<Argument, Function> {
 
@@ -86,6 +66,7 @@ private:
   mutable ArgumentListType ArgumentList;  ///< The formal arguments
   ValueSymbolTable *SymTab;               ///< Symbol table of args/instructions
   AttributeSet AttributeSets;             ///< Parameter attributes
+  FunctionType *Ty;
 
   /*
    * Value::SubclassData
@@ -133,7 +114,7 @@ public:
     return new(0) Function(Ty, Linkage, N, M);
   }
 
-  ~Function();
+  ~Function() override;
 
   Type *getReturnType() const;           // Return the type of the ret val
   FunctionType *getFunctionType() const; // Return the FunctionType for me
@@ -242,6 +223,10 @@ public:
   /// @brief adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
+  /// @brief adds the dereferenceable_or_null attribute to the list of
+  /// attributes.
+  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
+
   /// @brief Extract the alignment for a call or parameter (0=unknown).
   unsigned getParamAlignment(unsigned i) const {
     return AttributeSets.getParamAlignment(i);
@@ -472,6 +457,10 @@ public:
   Constant *getPrologueData() const;
   void setPrologueData(Constant *PrologueData);
 
+  /// Print the function to an output stream with an optional
+  /// AssemblyAnnotationWriter.
+  void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr) const;
+
   /// viewCFG - This function is meant for use from the debugger.  You can just
   /// say 'call F->viewCFG()' and a ghostview window should pop up from the
   /// program, displaying the CFG of the current function with the code for each
diff --git a/include/llvm/IR/GVMaterializer.h b/include/llvm/IR/GVMaterializer.h
index ae2f2e1..779f2e0 100644
--- a/include/llvm/IR/GVMaterializer.h
+++ b/include/llvm/IR/GVMaterializer.h
@@ -54,6 +54,7 @@ public:
   virtual std::error_code MaterializeModule(Module *M) = 0;
 
   virtual std::error_code materializeMetadata() = 0;
+  virtual void setStripDebugInfo() = 0;
 
   virtual std::vector<StructType *> getIdentifiedStructTypes() const = 0;
 };
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index 50deb08..f055241 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -27,7 +27,7 @@ class GlobalObject : public GlobalValue {
   GlobalObject(const GlobalObject &) = delete;
 
 protected:
-  GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
+  GlobalObject(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
                LinkageTypes Linkage, const Twine &Name)
       : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name), ObjComdat(nullptr) {
     setGlobalValueSubClassData(0);
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index 002e5e7..aeaaef4 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -61,7 +61,7 @@ public:
   };
 
 protected:
-  GlobalValue(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
+  GlobalValue(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
               LinkageTypes Linkage, const Twine &Name)
       : Constant(Ty, VTy, Ops, NumOps), Linkage(Linkage),
         Visibility(DefaultVisibility), UnnamedAddr(0),
@@ -104,7 +104,7 @@ public:
     LocalExecTLSModel
   };
 
-  ~GlobalValue() {
+  ~GlobalValue() override {
     removeDeadConstantUsers();   // remove any dead constants using this.
   }
 
@@ -165,9 +165,9 @@ public:
   const char *getSection() const;
 
   /// Global values are always pointers.
-  inline PointerType *getType() const {
-    return cast<PointerType>(User::getType());
-  }
+  PointerType *getType() const { return cast<PointerType>(User::getType()); }
+
+  Type *getValueType() const { return getType()->getElementType(); }
 
   static LinkageTypes getLinkOnceLinkage(bool ODR) {
     return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
@@ -343,11 +343,11 @@ public:
   virtual void eraseFromParent() = 0;
 
   /// Get the module that this global value is contained inside of...
-  inline Module *getParent() { return Parent; }
-  inline const Module *getParent() const { return Parent; }
+  Module *getParent() { return Parent; }
+  const Module *getParent() const { return Parent; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
+  static bool classof(const Value *V) {
     return V->getValueID() == Value::FunctionVal ||
            V->getValueID() == Value::GlobalVariableVal ||
            V->getValueID() == Value::GlobalAliasVal;
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index d7b81a2..9f57705 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -66,7 +66,7 @@ public:
                  ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0,
                  bool isExternallyInitialized = false);
 
-  ~GlobalVariable() {
+  ~GlobalVariable() override {
     NumOperands = 1; // FIXME: needed by operator delete
   }
 
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index 617e2bc..9c4ba07 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -21,6 +21,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/ConstantFolder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
@@ -123,7 +124,7 @@ public:
   /// \brief If this builder has a current debug location, set it on the
   /// specified instruction.
   void SetInstDebugLocation(Instruction *I) const {
-    if (!CurDbgLocation.isUnknown())
+    if (CurDbgLocation)
       I->setDebugLoc(CurDbgLocation);
   }
 
@@ -243,7 +244,7 @@ public:
   /// filled in with the null terminated string value specified.  The new global
   /// variable will be marked mergable with any others of the same contents.  If
   /// Name is specified, it is the name of the global variable created.
-  Value *CreateGlobalString(StringRef Str, const Twine &Name = "");
+  GlobalVariable *CreateGlobalString(StringRef Str, const Twine &Name = "");
 
   /// \brief Get a constant value representing either true or false.
   ConstantInt *getInt1(bool V) {
@@ -1028,12 +1029,16 @@ public:
         if (!isa<Constant>(IdxList[i]))
           break;
       if (i == e)
-        return Insert(Folder.CreateGetElementPtr(PC, IdxList), Name);
+        return Insert(Folder.CreateGetElementPtr(Ty, PC, IdxList), Name);
     }
     return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name);
   }
   Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
                            const Twine &Name = "") {
+    return CreateInBoundsGEP(nullptr, Ptr, IdxList, Name);
+  }
+  Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
+                           const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
       size_t i, e;
@@ -1041,68 +1046,77 @@ public:
         if (!isa<Constant>(IdxList[i]))
           break;
       if (i == e)
-        return Insert(Folder.CreateInBoundsGetElementPtr(PC, IdxList), Name);
+        return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, IdxList),
+                      Name);
     }
-    return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, IdxList), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, IdxList), Name);
   }
   Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
+    return CreateGEP(nullptr, Ptr, Idx, Name);
+  }
+  Value *CreateGEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Insert(Folder.CreateGetElementPtr(PC, IC), Name);
-    return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idx), Name);
+        return Insert(Folder.CreateGetElementPtr(Ty, PC, IC), Name);
+    return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
   }
-  Value *CreateInBoundsGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
+  Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, Value *Idx,
+                           const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Insert(Folder.CreateInBoundsGetElementPtr(PC, IC), Name);
-    return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idx), Name);
+        return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, IC), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
   }
   Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") {
+    return CreateConstGEP1_32(nullptr, Ptr, Idx0, Name);
+  }
+  Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0,
+                            const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idx), Name);
+      return Insert(Folder.CreateGetElementPtr(Ty, PC, Idx), Name);
 
-    return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idx), Name);
+    return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
   }
-  Value *CreateConstInBoundsGEP1_32(Value *Ptr, unsigned Idx0,
+  Value *CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0,
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idx), Name);
+      return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idx), Name);
 
-    return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idx), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
   }
-  Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
-                    const Twine &Name = "") {
+  Value *CreateConstGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1,
+                            const Twine &Name = "") {
     Value *Idxs[] = {
       ConstantInt::get(Type::getInt32Ty(Context), Idx0),
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idxs), Name);
+      return Insert(Folder.CreateGetElementPtr(Ty, PC, Idxs), Name);
 
-    return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idxs), Name);
+    return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name);
   }
-  Value *CreateConstInBoundsGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
-                                    const Twine &Name = "") {
+  Value *CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0,
+                                    unsigned Idx1, const Twine &Name = "") {
     Value *Idxs[] = {
       ConstantInt::get(Type::getInt32Ty(Context), Idx0),
       ConstantInt::get(Type::getInt32Ty(Context), Idx1)
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs), Name);
+      return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idxs), Name);
 
-    return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idxs), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
   }
   Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idx), Name);
+      return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idx), Name);
 
     return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idx), Name);
   }
@@ -1111,7 +1125,7 @@ public:
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idx), Name);
+      return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idx), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idx), Name);
   }
@@ -1123,7 +1137,7 @@ public:
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idxs), Name);
+      return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idxs), Name);
 
     return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idxs), Name);
   }
@@ -1135,21 +1149,23 @@ public:
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs), Name);
+      return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idxs),
+                    Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idxs), Name);
   }
-  Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") {
-    return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name);
+  Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx,
+                         const Twine &Name = "") {
+    return CreateConstInBoundsGEP2_32(Ty, Ptr, 0, Idx, Name);
   }
 
   /// \brief Same as CreateGlobalString, but return a pointer with "i8*" type
   /// instead of a pointer to array of i8.
   Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "") {
-    Value *gv = CreateGlobalString(Str, Name);
+    GlobalVariable *gv = CreateGlobalString(Str, Name);
     Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
     Value *Args[] = { zero, zero };
-    return CreateInBoundsGEP(gv, Args, Name);
+    return CreateInBoundsGEP(gv->getValueType(), gv, Args, Name);
   }
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IRPrintingPasses.h b/include/llvm/IR/IRPrintingPasses.h
index 7f2027b..5f1d56f 100644
--- a/include/llvm/IR/IRPrintingPasses.h
+++ b/include/llvm/IR/IRPrintingPasses.h
@@ -34,7 +34,8 @@ class raw_ostream;
 /// \brief Create and return a pass that writes the module to the specified
 /// \c raw_ostream.
 ModulePass *createPrintModulePass(raw_ostream &OS,
-                                  const std::string &Banner = "");
+                                  const std::string &Banner = "",
+                                  bool ShouldPreserveUseListOrder = false);
 
 /// \brief Create and return a pass that prints functions to the specified
 /// \c raw_ostream as they are processed.
@@ -53,10 +54,12 @@ BasicBlockPass *createPrintBasicBlockPass(raw_ostream &OS,
 class PrintModulePass {
   raw_ostream &OS;
   std::string Banner;
+  bool ShouldPreserveUseListOrder;
 
 public:
   PrintModulePass();
-  PrintModulePass(raw_ostream &OS, const std::string &Banner = "");
+  PrintModulePass(raw_ostream &OS, const std::string &Banner = "",
+                  bool ShouldPreserveUseListOrder = false);
 
   PreservedAnalyses run(Module &M);
 
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 6ae4122..c8f25e7 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -51,7 +51,7 @@ private:
   InlineAsm(PointerType *Ty, const std::string &AsmString,
             const std::string &Constraints, bool hasSideEffects,
             bool isAlignStack, AsmDialect asmDialect);
-  virtual ~InlineAsm();
+  ~InlineAsm() override;
 
   /// When the ConstantUniqueMap merges two types and makes two InlineAsms
   /// identical, it destroys one of them with this method.
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index 3a33f43..108b9eb 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -44,7 +44,7 @@ protected:
     : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {}
 
   // Out of line virtual method, so the vtable, etc has a home.
-  ~TerminatorInst();
+  ~TerminatorInst() override;
 
   /// Virtual methods - Terminators should overload these and provide inline
   /// overrides of non-V methods.
@@ -102,7 +102,7 @@ public:
   }
 
   // Out of line virtual method, so the vtable, etc has a home.
-  ~UnaryInstruction();
+  ~UnaryInstruction() override;
 
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index dc96b57..9dd16fd 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/IR/User.h"
 
 namespace llvm {
@@ -25,10 +26,27 @@ namespace llvm {
 class FastMathFlags;
 class LLVMContext;
 class MDNode;
+class BasicBlock;
 struct AAMDNodes;
 
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
+template <>
+struct ilist_traits<Instruction>
+    : public SymbolTableListTraits<Instruction, BasicBlock> {
+
+  /// \brief Return a node that marks the end of a list.
+  ///
+  /// The sentinel is relative to this instance, so we use a non-static
+  /// method.
+  Instruction *createSentinel() const;
+  static void destroySentinel(Instruction *) {}
+
+  Instruction *provideInitialHead() const { return createSentinel(); }
+  Instruction *ensureHead(Instruction *) const { return createSentinel(); }
+  static void noteHead(Instruction *, Instruction *) {}
+
+private:
+  mutable ilist_half_node<Instruction> Sentinel;
+};
 
 class Instruction : public User, public ilist_node<Instruction> {
   void operator=(const Instruction &) = delete;
@@ -44,7 +62,7 @@ class Instruction : public User, public ilist_node<Instruction> {
   };
 public:
   // Out of line virtual method, so the vtable, etc has a home.
-  ~Instruction();
+  ~Instruction() override;
 
   /// user_back - Specialize the methods defined in Value, as we know that an
   /// instruction can only be used by other instructions.
@@ -69,7 +87,8 @@ public:
   /// eraseFromParent - This method unlinks 'this' from the containing basic
   /// block and deletes it.
   ///
-  void eraseFromParent();
+  /// \returns an iterator pointing to the element after the erased one
+  iplist<Instruction>::iterator eraseFromParent();
 
   /// insertBefore - Insert an unlinked instructions into a basic block
   /// immediately before the specified instruction.
@@ -134,9 +153,7 @@ public:
 
   /// hasMetadata() - Return true if this instruction has any metadata attached
   /// to it.
-  bool hasMetadata() const {
-    return !DbgLoc.isUnknown() || hasMetadataHashEntry();
-  }
+  bool hasMetadata() const { return DbgLoc || hasMetadataHashEntry(); }
 
   /// hasMetadataOtherThanDebugLoc - Return true if this instruction has
   /// metadata attached to it other than a debug location.
@@ -495,6 +512,17 @@ protected:
 
 };
 
+inline Instruction *ilist_traits<Instruction>::createSentinel() const {
+  // Since i(p)lists always publicly derive from their corresponding traits,
+  // placing a data member in this class will augment the i(p)list.  But since
+  // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
+  // there is a legal viable downcast from it to NodeTy. We use this trick to
+  // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
+  // sentinel. Dereferencing the sentinel is forbidden (save the
+  // ilist_node<NodeTy>), so no one will ever notice the superposition.
+  return static_cast<Instruction *>(&Sentinel);
+}
+
 // Instruction* is only 4-byte aligned.
 template<>
 class PointerLikeTypeTraits<Instruction*> {
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 52fa360..1f2ca30 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -91,7 +91,7 @@ public:
              const Twine &Name, BasicBlock *InsertAtEnd);
 
   // Out of line virtual method, so the vtable, etc. has a home.
-  virtual ~AllocaInst();
+  ~AllocaInst() override;
 
   /// isArrayAllocation - Return true if there is an allocation size parameter
   /// to the allocation instruction that is not 1.
@@ -180,7 +180,12 @@ public:
            unsigned Align, Instruction *InsertBefore = nullptr);
   LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+           AtomicOrdering Order, SynchronizationScope SynchScope = CrossThread,
+           Instruction *InsertBefore = nullptr)
+      : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
+                 NameStr, isVolatile, Align, Order, SynchScope, InsertBefore) {}
+  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, AtomicOrdering Order,
            SynchronizationScope SynchScope = CrossThread,
            Instruction *InsertBefore = nullptr);
@@ -882,9 +887,9 @@ public:
   /// Null is returned if the indices are invalid for the specified
   /// pointer type.
   ///
-  static Type *getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList);
-  static Type *getIndexedType(Type *Ptr, ArrayRef<Constant *> IdxList);
-  static Type *getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList);
+  static Type *getIndexedType(Type *Ty, ArrayRef<Value *> IdxList);
+  static Type *getIndexedType(Type *Ty, ArrayRef<Constant *> IdxList);
+  static Type *getIndexedType(Type *Ty, ArrayRef<uint64_t> IdxList);
 
   inline op_iterator       idx_begin()       { return op_begin()+1; }
   inline const_op_iterator idx_begin() const { return op_begin()+1; }
@@ -915,9 +920,12 @@ public:
   /// GetGEPReturnType - Returns the pointer type returned by the GEP
   /// instruction, which may be a vector of pointers.
   static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
-    Type *PtrTy = PointerType::get(checkGEPType(
-                                   getIndexedType(Ptr->getType(), IdxList)),
-                                   Ptr->getType()->getPointerAddressSpace());
+    Type *PtrTy =
+        PointerType::get(checkGEPType(getIndexedType(
+                             cast<PointerType>(Ptr->getType()->getScalarType())
+                                 ->getElementType(),
+                             IdxList)),
+                         Ptr->getType()->getPointerAddressSpace());
     // Vector GEP
     if (Ptr->getType()->isVectorTy()) {
       unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements();
@@ -1327,7 +1335,11 @@ public:
   static Instruction* CreateFree(Value* Source, Instruction *InsertBefore);
   static Instruction* CreateFree(Value* Source, BasicBlock *InsertAtEnd);
 
-  ~CallInst();
+  ~CallInst() override;
+
+  Type *getFunctionType() const {
+    return cast<PointerType>(getCalledValue()->getType())->getElementType();
+  }
 
   // Note that 'musttail' implies 'tail'.
   enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2 };
@@ -1404,6 +1416,10 @@ public:
   /// \brief adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
+  /// \brief adds the dereferenceable_or_null attribute to the list of
+  /// attributes.
+  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
+
   /// \brief Determine whether this call has the given attribute.
   bool hasFnAttr(Attribute::AttrKind A) const {
     assert(A != Attribute::NoBuiltin &&
@@ -2167,7 +2183,7 @@ public:
                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
     return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd);
   }
-  ~PHINode();
+  ~PHINode() override;
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -2358,7 +2374,7 @@ public:
   static LandingPadInst *Create(Type *RetTy, Value *PersonalityFn,
                                 unsigned NumReservedClauses,
                                 const Twine &NameStr, BasicBlock *InsertAtEnd);
-  ~LandingPadInst();
+  ~LandingPadInst() override;
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -2460,7 +2476,7 @@ public:
   static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) {
     return new(0) ReturnInst(C, InsertAtEnd);
   }
-  virtual ~ReturnInst();
+  ~ReturnInst() override;
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -2757,7 +2773,7 @@ public:
     return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
   }
 
-  ~SwitchInst();
+  ~SwitchInst() override;
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -2943,7 +2959,7 @@ public:
                                 BasicBlock *InsertAtEnd) {
     return new IndirectBrInst(Address, NumDests, InsertAtEnd);
   }
-  ~IndirectBrInst();
+  ~IndirectBrInst() override;
 
   /// Provide fast operand accessors.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -3093,9 +3109,13 @@ public:
   /// removeAttribute - removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute attr);
 
-  /// \brief removes the dereferenceable attribute to the list of attributes.
+  /// \brief adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
+  /// \brief adds the dereferenceable_or_null attribute to the list of
+  /// attributes.
+  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
+
   /// \brief Determine whether this call has the given attribute.
   bool hasFnAttr(Attribute::AttrKind A) const {
     assert(A != Attribute::NoBuiltin &&
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index da9d8cb..4052a31 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -421,10 +421,6 @@ def int_eh_endcatch : Intrinsic<[], []>;
 // Represents the list of actions to take when an exception is thrown.
 def int_eh_actions : Intrinsic<[llvm_ptr_ty], [llvm_vararg_ty], []>;
 
-// Designates the provided static alloca as the unwind help object. Required
-// for WinEH.
-def int_eh_unwindhelp : Intrinsic<[], [llvm_ptr_ty], []>;
-
 // __builtin_unwind_init is an undocumented GCC intrinsic that causes all
 // callee-saved registers to be saved and restored (regardless of whether they
 // are used) in the calling function. It is used by libgcc_eh.
@@ -630,3 +626,4 @@ include "llvm/IR/IntrinsicsNVVM.td"
 include "llvm/IR/IntrinsicsMips.td"
 include "llvm/IR/IntrinsicsR600.td"
 include "llvm/IR/IntrinsicsBPF.td"
+include "llvm/IR/IntrinsicsSystemZ.td"
diff --git a/include/llvm/IR/IntrinsicsBPF.td b/include/llvm/IR/IntrinsicsBPF.td
index 6b5110b..94eca8e 100644
--- a/include/llvm/IR/IntrinsicsBPF.td
+++ b/include/llvm/IR/IntrinsicsBPF.td
@@ -19,4 +19,6 @@ let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
               Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
   def int_bpf_load_word : GCCBuiltin<"__builtin_bpf_load_word">,
               Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+  def int_bpf_pseudo : GCCBuiltin<"__builtin_bpf_pseudo">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
 }
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 95fc3e5..74c0172 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -37,6 +37,25 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   // generated by the PowerPC backend!
   def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>;
   def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>;
+
+  // Intrinsics for [double]word extended forms of divide instructions
+  def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
+                      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                                [IntrNoMem]>;
+  def int_ppc_divweu : GCCBuiltin<"__builtin_divweu">,
+                       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                                 [IntrNoMem]>;
+  def int_ppc_divde : GCCBuiltin<"__builtin_divde">,
+                      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                                [IntrNoMem]>;
+  def int_ppc_divdeu : GCCBuiltin<"__builtin_divdeu">,
+                       Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                                 [IntrNoMem]>;
+
+  // Bit permute doubleword
+  def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">,
+                       Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                                 [IntrNoMem]>;
 }
 
 
@@ -563,11 +582,12 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
                          llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
-// These need diagnostics for invalid arguments so don't inherit from GCCBuiltin
 def int_ppc_altivec_crypto_vshasigmad :
+            GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
             Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
                        llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_ppc_altivec_crypto_vshasigmaw :
+            GCCBuiltin<"__builtin_altivec_crypto_vshasigmaw">,
             Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
                        llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 }
@@ -801,20 +821,20 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
 
 let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
 
-def int_ppc_tbegin :
+def int_ppc_tbegin : GCCBuiltin<"__builtin_tbegin">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-def int_ppc_tend :
+def int_ppc_tend : GCCBuiltin<"__builtin_tend">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
 
 def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-def int_ppc_tabortwc :
+def int_ppc_tabortwc : GCCBuiltin<"__builtin_tabortwc">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-def int_ppc_tabortwci :
+def int_ppc_tabortwci : GCCBuiltin<"__builtin_tabortwci">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-def int_ppc_tabortdc :
+def int_ppc_tabortdc : GCCBuiltin<"__builtin_tabortdc">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-def int_ppc_tabortdci :
+def int_ppc_tabortdci : GCCBuiltin<"__builtin_tabortdci">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
 
 def int_ppc_tcheck : GCCBuiltin<"__builtin_tcheck">,
@@ -823,7 +843,7 @@ def int_ppc_treclaim : GCCBuiltin<"__builtin_treclaim">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
 def int_ppc_trechkpt : GCCBuiltin<"__builtin_trechkpt">,
       Intrinsic<[llvm_i32_ty], [], []>;
-def int_ppc_tsr :
+def int_ppc_tsr : GCCBuiltin<"__builtin_tsr">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
 
 def int_ppc_get_texasr : GCCBuiltin<"__builtin_get_texasr">,
diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td
new file mode 100644
index 0000000..6883db3
--- /dev/null
+++ b/include/llvm/IR/IntrinsicsSystemZ.td
@@ -0,0 +1,46 @@
+//===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the SystemZ-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Transactional-execution intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "s390" in {
+  def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                  [IntrNoDuplicate]>;
+
+  def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty],
+                                          [llvm_ptr_ty, llvm_i32_ty],
+                                          [IntrNoDuplicate]>;
+
+  def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
+                                   [IntrNoDuplicate]>;
+
+  def int_s390_tabort : Intrinsic<[], [llvm_i64_ty],
+                                  [IntrNoReturn, Throws]>;
+
+  def int_s390_tend : GCCBuiltin<"__builtin_tend">,
+                      Intrinsic<[llvm_i32_ty], []>;
+
+  def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">,
+                      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+
+  def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty],
+                                 [IntrReadWriteArgMem]>;
+
+  def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">,
+                              Intrinsic<[], [llvm_i32_ty]>;
+}
+
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 39b8e3b..49231d8 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -1408,12 +1408,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
                          llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
 // Vector min, max
@@ -3093,7 +3087,27 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty],
                         [IntrNoMem]>;
 }
-
+//Bitwise Ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_por_d_512 : GCCBuiltin<"__builtin_ia32_pord512_mask">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_por_q_512 : GCCBuiltin<"__builtin_ia32_porq512_mask">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pxor_d_512 : GCCBuiltin<"__builtin_ia32_pxord512_mask">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pxor_q_512 : GCCBuiltin<"__builtin_ia32_pxorq512_mask">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
 // Arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
@@ -3222,6 +3236,27 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                       [IntrNoMem]>;
 }
 
+// Integer arithmetic ops
+let TargetPrefix = "x86" in {
+  def int_x86_avx512_mask_padd_d_512 : GCCBuiltin<"__builtin_ia32_paddd512_mask">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_padd_q_512 : GCCBuiltin<"__builtin_ia32_paddq512_mask">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psub_d_512 : GCCBuiltin<"__builtin_ia32_psubd512_mask">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psub_q_512 : GCCBuiltin<"__builtin_ia32_psubq512_mask">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
 // Gather and Scatter ops
 let TargetPrefix = "x86" in {
   def int_x86_avx512_gather_dpd_512  : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
@@ -3813,14 +3848,6 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
             Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
                                       llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
-            Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                                         llvm_v16i32_ty, llvm_i16_ty],
-                      [IntrNoMem]>;
-  def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">,
-            Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                                        llvm_v8i64_ty, llvm_i8_ty],
-                      [IntrNoMem]>;
   def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
             Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
 }
diff --git a/include/llvm/IR/LegacyPassManager.h b/include/llvm/IR/LegacyPassManager.h
index 6c04e9d..5257a0e 100644
--- a/include/llvm/IR/LegacyPassManager.h
+++ b/include/llvm/IR/LegacyPassManager.h
@@ -50,7 +50,7 @@ class PassManager : public PassManagerBase {
 public:
 
   PassManager();
-  ~PassManager();
+  ~PassManager() override;
 
   void add(Pass *P) override;
 
@@ -70,7 +70,7 @@ public:
   /// FunctionPassManager ctor - This initializes the pass manager.  It needs,
   /// but does not take ownership of, the specified Module.
   explicit FunctionPassManager(Module *M);
-  ~FunctionPassManager();
+  ~FunctionPassManager() override;
 
   void add(Pass *P) override;
 
diff --git a/include/llvm/IR/LegacyPassNameParser.h b/include/llvm/IR/LegacyPassNameParser.h
index 52db1c3..39ae80d 100644
--- a/include/llvm/IR/LegacyPassNameParser.h
+++ b/include/llvm/IR/LegacyPassNameParser.h
@@ -43,7 +43,7 @@ class PassNameParser : public PassRegistrationListener,
                        public cl::parser<const PassInfo*> {
 public:
   PassNameParser(cl::Option &O);
-  virtual ~PassNameParser();
+  ~PassNameParser() override;
 
   void initialize() {
     cl::parser<const PassInfo*>::initialize();
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index a31bdbf..03e70fe 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -89,7 +89,7 @@ protected:
   Metadata(unsigned ID, StorageType Storage)
       : SubclassID(ID), Storage(Storage), SubclassData16(0), SubclassData32(0) {
   }
-  ~Metadata() {}
+  ~Metadata() = default;
 
   /// \brief Default handling of a changed operand, which asserts.
   ///
@@ -164,7 +164,7 @@ class MetadataAsValue : public Value {
   Metadata *MD;
 
   MetadataAsValue(Type *Ty, Metadata *MD);
-  ~MetadataAsValue();
+  ~MetadataAsValue() override;
 
   /// \brief Drop use of metadata (during teardown).
   void dropUse() { MD = nullptr; }
@@ -253,7 +253,7 @@ protected:
       : Metadata(ID, Uniqued), ReplaceableMetadataImpl(V->getContext()), V(V) {
     assert(V && "Expected valid value");
   }
-  ~ValueAsMetadata() {}
+  ~ValueAsMetadata() = default;
 
 public:
   static ValueAsMetadata *get(Value *V);
@@ -754,13 +754,18 @@ protected:
 
   MDNode(LLVMContext &Context, unsigned ID, StorageType Storage,
          ArrayRef<Metadata *> Ops1, ArrayRef<Metadata *> Ops2 = None);
-  ~MDNode() {}
+  ~MDNode() = default;
 
   void dropAllReferences();
 
   MDOperand *mutable_begin() { return mutable_end() - NumOperands; }
   MDOperand *mutable_end() { return reinterpret_cast<MDOperand *>(this); }
 
+  typedef iterator_range<MDOperand *> mutable_op_range;
+  mutable_op_range mutable_operands() {
+    return mutable_op_range(mutable_begin(), mutable_end());
+  }
+
 public:
   static inline MDTuple *get(LLVMContext &Context, ArrayRef<Metadata *> MDs);
   static inline MDTuple *getIfExists(LLVMContext &Context,
@@ -1028,6 +1033,78 @@ void TempMDNodeDeleter::operator()(MDNode *Node) const {
   MDNode::deleteTemporary(Node);
 }
 
+/// \brief Typed iterator through MDNode operands.
+///
+/// An iterator that transforms an \a MDNode::iterator into an iterator over a
+/// particular Metadata subclass.
+template <class T>
+class TypedMDOperandIterator
+    : std::iterator<std::input_iterator_tag, T *, std::ptrdiff_t, void, T *> {
+  MDNode::op_iterator I = nullptr;
+
+public:
+  TypedMDOperandIterator() = default;
+  explicit TypedMDOperandIterator(MDNode::op_iterator I) : I(I) {}
+  T *operator*() const { return cast_or_null<T>(*I); }
+  TypedMDOperandIterator &operator++() {
+    ++I;
+    return *this;
+  }
+  TypedMDOperandIterator operator++(int) {
+    TypedMDOperandIterator Temp(*this);
+    ++I;
+    return Temp;
+  }
+  bool operator==(const TypedMDOperandIterator &X) const { return I == X.I; }
+  bool operator!=(const TypedMDOperandIterator &X) const { return I != X.I; }
+};
+
+/// \brief Typed, array-like tuple of metadata.
+///
+/// This is a wrapper for \a MDTuple that makes it act like an array holding a
+/// particular type of metadata.
+template <class T> class MDTupleTypedArrayWrapper {
+  const MDTuple *N = nullptr;
+
+public:
+  MDTupleTypedArrayWrapper() = default;
+  MDTupleTypedArrayWrapper(const MDTuple *N) : N(N) {}
+
+  template <class U>
+  MDTupleTypedArrayWrapper(
+      const MDTupleTypedArrayWrapper<U> &Other,
+      typename std::enable_if<std::is_convertible<U *, T *>::value>::type * =
+          nullptr)
+      : N(Other.get()) {}
+
+  template <class U>
+  explicit MDTupleTypedArrayWrapper(
+      const MDTupleTypedArrayWrapper<U> &Other,
+      typename std::enable_if<!std::is_convertible<U *, T *>::value>::type * =
+          nullptr)
+      : N(Other.get()) {}
+
+  explicit operator bool() const { return get(); }
+  explicit operator MDTuple *() const { return get(); }
+
+  MDTuple *get() const { return const_cast<MDTuple *>(N); }
+  MDTuple *operator->() const { return get(); }
+  MDTuple &operator*() const { return *get(); }
+
+  // FIXME: Fix callers and remove condition on N.
+  unsigned size() const { return N ? N->getNumOperands() : 0u; }
+  T *operator[](unsigned I) const { return cast_or_null<T>(N->getOperand(I)); }
+
+  // FIXME: Fix callers and remove condition on N.
+  typedef TypedMDOperandIterator<T> iterator;
+  iterator begin() const { return N ? iterator(N->op_begin()) : iterator(); }
+  iterator end() const { return N ? iterator(N->op_end()) : iterator(); }
+};
+
+#define HANDLE_METADATA(CLASS)                                                 \
+  typedef MDTupleTypedArrayWrapper<CLASS> CLASS##Array;
+#include "llvm/IR/Metadata.def"
+
 //===----------------------------------------------------------------------===//
 /// \brief A tuple of MDNodes.
 ///
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index ac60c8e..dbaf322 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -642,8 +642,11 @@ public:
 /// @{
 
   /// Print the module to an output stream with an optional
-  /// AssemblyAnnotationWriter.
-  void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const;
+  /// AssemblyAnnotationWriter.  If \c ShouldPreserveUseListOrder, then include
+  /// uselistorder directives so that use-lists can be recreated when reading
+  /// the assembly.
+  void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW,
+             bool ShouldPreserveUseListOrder = false) const;
 
   /// Dump the module to stderr (for debugging).
   void dump() const;
diff --git a/include/llvm/IR/NoFolder.h b/include/llvm/IR/NoFolder.h
index ab7bed6..61f4817 100644
--- a/include/llvm/IR/NoFolder.h
+++ b/include/llvm/IR/NoFolder.h
@@ -177,34 +177,35 @@ public:
   // Memory Instructions
   //===--------------------------------------------------------------------===//
 
-  Constant *CreateGetElementPtr(Constant *C,
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C,
                                 ArrayRef<Constant *> IdxList) const {
-    return ConstantExpr::getGetElementPtr(C, IdxList);
+    return ConstantExpr::getGetElementPtr(Ty, C, IdxList);
   }
-  Constant *CreateGetElementPtr(Constant *C, Constant *Idx) const {
+  Constant *CreateGetElementPtr(Type *Ty, Constant *C, Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return ConstantExpr::getGetElementPtr(C, Idx);
+    return ConstantExpr::getGetElementPtr(Ty, C, Idx);
   }
-  Instruction *CreateGetElementPtr(Constant *C,
+  Instruction *CreateGetElementPtr(Type *Ty, Constant *C,
                                    ArrayRef<Value *> IdxList) const {
-    return GetElementPtrInst::Create(nullptr, C, IdxList);
+    return GetElementPtrInst::Create(Ty, C, IdxList);
   }
 
-  Constant *CreateInBoundsGetElementPtr(Constant *C,
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         ArrayRef<Constant *> IdxList) const {
-    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList);
+    return ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList);
   }
-  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant *Idx) const {
+  Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
+                                        Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return ConstantExpr::getInBoundsGetElementPtr(C, Idx);
+    return ConstantExpr::getInBoundsGetElementPtr(Ty, C, Idx);
   }
-  Instruction *CreateInBoundsGetElementPtr(Constant *C,
+  Instruction *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                            ArrayRef<Value *> IdxList) const {
-    return GetElementPtrInst::CreateInBounds(nullptr, C, IdxList);
+    return GetElementPtrInst::CreateInBounds(Ty, C, IdxList);
   }
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
index c87f89c..8c3afdd 100644
--- a/include/llvm/IR/Operator.h
+++ b/include/llvm/IR/Operator.h
@@ -42,7 +42,7 @@ protected:
   // NOTE: Cannot use = delete because it's not legal to delete
   // an overridden method that's not deleted in the base class. Cannot leave
   // this unimplemented because that leads to an ODR-violation.
-  ~Operator();
+  ~Operator() override;
 
 public:
   /// Return the opcode for this Instruction or ConstantExpr.
@@ -181,17 +181,17 @@ public:
   { }
 
   /// Whether any flag is set
-  bool any() { return Flags != 0; }
+  bool any() const { return Flags != 0; }
 
   /// Set all the flags to false
   void clear() { Flags = 0; }
 
   /// Flag queries
-  bool noNaNs()          { return 0 != (Flags & NoNaNs); }
-  bool noInfs()          { return 0 != (Flags & NoInfs); }
-  bool noSignedZeros()   { return 0 != (Flags & NoSignedZeros); }
-  bool allowReciprocal() { return 0 != (Flags & AllowReciprocal); }
-  bool unsafeAlgebra()   { return 0 != (Flags & UnsafeAlgebra); }
+  bool noNaNs() const          { return 0 != (Flags & NoNaNs); }
+  bool noInfs() const          { return 0 != (Flags & NoInfs); }
+  bool noSignedZeros() const   { return 0 != (Flags & NoSignedZeros); }
+  bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); }
+  bool unsafeAlgebra() const   { return 0 != (Flags & UnsafeAlgebra); }
 
   /// Flag setters
   void setNoNaNs()          { Flags |= NoNaNs; }
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index f94e105..41154e6 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -295,6 +295,9 @@ template <typename Class> struct bind_ty {
 /// \brief Match a value, capturing it if we match.
 inline bind_ty<Value> m_Value(Value *&V) { return V; }
 
+/// \brief Match an instruction, capturing it if we match.
+inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+
 /// \brief Match a binary operator, capturing it if we match.
 inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
 
@@ -1103,6 +1106,52 @@ m_UnordFMax(const LHS &L, const RHS &R) {
   return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
 }
 
+//===----------------------------------------------------------------------===//
+// Matchers for overflow check patterns: e.g. (a + b) u< a
+//
+
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+struct UAddWithOverflow_match {
+  LHS_t L;
+  RHS_t R;
+  Sum_t S;
+
+  UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
+      : L(L), R(R), S(S) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    Value *ICmpLHS, *ICmpRHS;
+    ICmpInst::Predicate Pred;
+    if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
+      return false;
+
+    Value *AddLHS, *AddRHS;
+    auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
+
+    // (a + b) u< a, (a + b) u< b
+    if (Pred == ICmpInst::ICMP_ULT)
+      if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
+
+    // a >u (a + b), b >u (a + b)
+    if (Pred == ICmpInst::ICMP_UGT)
+      if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+
+    return false;
+  }
+};
+
+/// \brief Match an icmp instruction checking for unsigned overflow on addition.
+///
+/// S is matched to the addition whose result is being checked for overflow, and
+/// L and R are matched to the LHS and RHS of S.
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
+m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
+  return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
+}
+
 /// \brief Match an 'unordered' floating point minimum function.
 /// Floating point has one special value 'NaN'. Therefore, there is no total
 /// order. However, if we can ignore the 'NaN' value (for example, because of a
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index c2073c7..09b6388 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -91,8 +91,8 @@ protected:
       NumContainedTys(0), ContainedTys(nullptr) {
     setTypeID(tid);
   }
-  ~Type() {}
-  
+  ~Type() = default;
+
   void setTypeID(TypeID ID) {
     IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00);
     assert(getTypeID() == ID && "TypeID data too large for field");
diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h
index 7d8205d..b7c2418 100644
--- a/include/llvm/IR/UseListOrder.h
+++ b/include/llvm/IR/UseListOrder.h
@@ -51,12 +51,6 @@ private:
 
 typedef std::vector<UseListOrder> UseListOrderStack;
 
-/// \brief Whether to preserve use-list ordering.
-bool shouldPreserveBitcodeUseListOrder();
-bool shouldPreserveAssemblyUseListOrder();
-void setPreserveBitcodeUseListOrder(bool ShouldPreserve);
-void setPreserveAssemblyUseListOrder(bool ShouldPreserve);
-
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index d39378d..4559005 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -60,9 +60,7 @@ protected:
     NumOperands = 0;
   }
 public:
-  ~User() {
-    Use::zap(OperandList, OperandList + NumOperands);
-  }
+  ~User() override { Use::zap(OperandList, OperandList + NumOperands); }
   /// \brief Free memory allocated for User and Use objects.
   void operator delete(void *Usr);
   /// \brief Placement delete - required by std, but never called.
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index 08d6d17..4d00b63 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -99,8 +99,6 @@ public:
   explicit ValueMap(const ExtraData &Data, unsigned NumInitBuckets = 64)
       : Map(NumInitBuckets), Data(Data) {}
 
-  ~ValueMap() {}
-
   bool hasMD() const { return MDMap; }
   MDMapT &MD() {
     if (!MDMap)
@@ -149,9 +147,14 @@ public:
   // If the key is already in the map, it returns false and doesn't update the
   // value.
   std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
-    std::pair<typename MapT::iterator, bool> map_result=
-      Map.insert(std::make_pair(Wrap(KV.first), KV.second));
-    return std::make_pair(iterator(map_result.first), map_result.second);
+    auto MapResult = Map.insert(std::make_pair(Wrap(KV.first), KV.second));
+    return std::make_pair(iterator(MapResult.first), MapResult.second);
+  }
+
+  std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) {
+    auto MapResult =
+        Map.insert(std::make_pair(Wrap(KV.first), std::move(KV.second)));
+    return std::make_pair(iterator(MapResult.first), MapResult.second);
   }
 
   /// insert - Range insertion of pairs.
@@ -258,9 +261,9 @@ public:
       // I could == Copy.Map->Map.end() if the onRAUW callback already
       // removed the old mapping.
       if (I != Copy.Map->Map.end()) {
-        ValueT Target(I->second);
+        ValueT Target(std::move(I->second));
         Copy.Map->Map.erase(I);  // Definitely destroys *this.
-        Copy.Map->insert(std::make_pair(typed_new_key, Target));
+        Copy.Map->insert(std::make_pair(typed_new_key, std::move(Target)));
       }
     }
   }
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 1d428b0..490d814 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -110,6 +110,7 @@ void initializeDeadInstEliminationPass(PassRegistry&);
 void initializeDeadMachineInstructionElimPass(PassRegistry&);
 void initializeDelinearizationPass(PassRegistry &);
 void initializeDependenceAnalysisPass(PassRegistry&);
+void initializeDivergenceAnalysisPass(PassRegistry&);
 void initializeDomOnlyPrinterPass(PassRegistry&);
 void initializeDomOnlyViewerPass(PassRegistry&);
 void initializeDomPrinterPass(PassRegistry&);
@@ -205,6 +206,7 @@ void initializeMergedLoadStoreMotionPass(PassRegistry &);
 void initializeMetaRenamerPass(PassRegistry&);
 void initializeMergeFunctionsPass(PassRegistry&);
 void initializeModuleDebugInfoPrinterPass(PassRegistry&);
+void initializeNaryReassociatePass(PassRegistry&);
 void initializeNoAAPass(PassRegistry&);
 void initializeObjCARCAliasAnalysisPass(PassRegistry&);
 void initializeObjCARCAPElimPass(PassRegistry&);
@@ -294,6 +296,7 @@ void initializeWinEHPreparePass(PassRegistry&);
 void initializePlaceBackedgeSafepointsImplPass(PassRegistry&);
 void initializePlaceSafepointsPass(PassRegistry&);
 void initializeDwarfEHPreparePass(PassRegistry&);
+void initializeFloat2IntPass(PassRegistry&);
 }
 
 #endif
diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h
index 820d6d5..a4aace4 100644
--- a/include/llvm/LTO/LTOCodeGenerator.h
+++ b/include/llvm/LTO/LTOCodeGenerator.h
@@ -53,6 +53,7 @@ namespace llvm {
   class TargetLibraryInfo;
   class TargetMachine;
   class raw_ostream;
+  class raw_pwrite_stream;
 
 //===----------------------------------------------------------------------===//
 /// C++ class which implements the opaque lto_code_gen_t type.
@@ -137,7 +138,7 @@ struct LTOCodeGenerator {
 private:
   void initializeLTOPasses();
 
-  bool compileOptimized(raw_ostream &out, std::string &errMsg);
+  bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg);
   bool compileOptimizedToFile(const char **name, std::string &errMsg);
   void applyScopeRestrictions();
   void applyRestriction(GlobalValue &GV, ArrayRef<StringRef> Libcalls,
diff --git a/include/llvm/LineEditor/LineEditor.h b/include/llvm/LineEditor/LineEditor.h
index 1a9a691..bb106f8 100644
--- a/include/llvm/LineEditor/LineEditor.h
+++ b/include/llvm/LineEditor/LineEditor.h
@@ -119,7 +119,7 @@ private:
   };
 
   struct ListCompleterConcept : CompleterConcept {
-    ~ListCompleterConcept();
+    ~ListCompleterConcept() override;
     CompletionAction complete(StringRef Buffer, size_t Pos) const override;
     static std::string getCommonPrefix(const std::vector<Completion> &Comps);
     virtual std::vector<Completion> getCompletions(StringRef Buffer,
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 391267c..bee39b1 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LINKALLPASSES_H
 #define LLVM_LINKALLPASSES_H
 
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/CallPrinter.h"
 #include "llvm/Analysis/DomPrinter.h"
@@ -35,6 +36,7 @@
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include "llvm/Transforms/Vectorize.h"
+#include "llvm/Support/Valgrind.h"
 #include <cstdlib>
 
 namespace {
@@ -74,6 +76,7 @@ namespace {
       (void) llvm::createDeadInstEliminationPass();
       (void) llvm::createDeadStoreEliminationPass();
       (void) llvm::createDependenceAnalysisPass();
+      (void) llvm::createDivergenceAnalysisPass();
       (void) llvm::createDomOnlyPrinterPass();
       (void) llvm::createDomPrinterPass();
       (void) llvm::createDomOnlyViewerPass();
@@ -106,6 +109,7 @@ namespace {
       (void) llvm::createLowerExpectIntrinsicPass();
       (void) llvm::createLowerInvokePass();
       (void) llvm::createLowerSwitchPass();
+      (void) llvm::createNaryReassociatePass();
       (void) llvm::createNoAAPass();
       (void) llvm::createObjCARCAliasAnalysisPass();
       (void) llvm::createObjCARCAPElimPass();
@@ -169,6 +173,7 @@ namespace {
       (void) llvm::createRewriteSymbolsPass();
       (void) llvm::createStraightLineStrengthReducePass();
       (void) llvm::createMemDerefPrinter();
+      (void) llvm::createFloat2IntPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::ScalarEvolution();
@@ -177,6 +182,8 @@ namespace {
       ((llvm::RegionPass*)nullptr)->runOnRegion((llvm::Region*)nullptr, RGM);
       llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)nullptr);
       X.add(nullptr, 0, llvm::AAMDNodes()); // for -print-alias-sets
+      (void) llvm::AreStatisticsEnabled();
+      (void) llvm::sys::RunningOnValgrind();
     }
   } ForcePassLinking; // Force link by creating a global definition.
 }
diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
index 1fc0332..ee7022b 100644
--- a/include/llvm/MC/ConstantPools.h
+++ b/include/llvm/MC/ConstantPools.h
@@ -77,9 +77,6 @@ class AssemblerConstantPools {
   ConstantPoolMapTy ConstantPools;
 
 public:
-  AssemblerConstantPools() {}
-  ~AssemblerConstantPools() {}
-
   void emitAll(MCStreamer &Streamer);
   void emitForCurrentSection(MCStreamer &Streamer);
   const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr,
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 56da95d..ff77dc9 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -30,7 +30,7 @@ class MCSection;
 class MCValue;
 class raw_ostream;
 
-/// MCAsmBackend - Generic interface to target specific assembler backends.
+/// Generic interface to target specific assembler backends.
 class MCAsmBackend {
   MCAsmBackend(const MCAsmBackend &) = delete;
   void operator=(const MCAsmBackend &) = delete;
@@ -46,42 +46,42 @@ public:
   /// lifetime management
   virtual void reset() {}
 
-  /// createObjectWriter - Create a new MCObjectWriter instance for use by the
-  /// assembler backend to emit the final object file.
-  virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0;
+  /// Create a new MCObjectWriter instance for use by the assembler backend to
+  /// emit the final object file.
+  virtual MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const = 0;
 
-  /// createELFObjectTargetWriter - Create a new ELFObjectTargetWriter to enable
-  /// non-standard ELFObjectWriters.
+  /// Create a new ELFObjectTargetWriter to enable non-standard
+  /// ELFObjectWriters.
   virtual MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
     llvm_unreachable("createELFObjectTargetWriter is not supported by asm "
                      "backend");
   }
 
-  /// hasDataInCodeSupport - Check whether this target implements data-in-code
-  /// markers. If not, data region directives will be ignored.
+  /// Check whether this target implements data-in-code markers. If not, data
+  /// region directives will be ignored.
   bool hasDataInCodeSupport() const { return HasDataInCodeSupport; }
 
   /// @name Target Fixup Interfaces
   /// @{
 
-  /// getNumFixupKinds - Get the number of target specific fixup kinds.
+  /// Get the number of target specific fixup kinds.
   virtual unsigned getNumFixupKinds() const = 0;
 
-  /// getFixupKindInfo - Get information on a fixup kind.
+  /// Get information on a fixup kind.
   virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
 
-  /// processFixupValue - Target hook to adjust the literal value of a fixup
-  /// if necessary. IsResolved signals whether the caller believes a relocation
-  /// is needed; the target can modify the value. The default does nothing.
+  /// Target hook to adjust the literal value of a fixup if necessary.
+  /// IsResolved signals whether the caller believes a relocation is needed; the
+  /// target can modify the value. The default does nothing.
   virtual void processFixupValue(const MCAssembler &Asm,
                                  const MCAsmLayout &Layout,
                                  const MCFixup &Fixup, const MCFragment *DF,
                                  const MCValue &Target, uint64_t &Value,
                                  bool &IsResolved) {}
 
-  /// applyFixup - Apply the \p Value for given \p Fixup into the provided
-  /// data fragment, at the offset specified by the fixup and following the
-  /// fixup kind as appropriate.
+  /// Apply the \p Value for given \p Fixup into the provided data fragment, at
+  /// the offset specified by the fixup and following the fixup kind as
+  /// appropriate.
   virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                           uint64_t Value, bool IsPCRel) const = 0;
 
@@ -90,20 +90,18 @@ public:
   /// @name Target Relaxation Interfaces
   /// @{
 
-  /// mayNeedRelaxation - Check whether the given instruction may need
-  /// relaxation.
+  /// Check whether the given instruction may need relaxation.
   ///
   /// \param Inst - The instruction to test.
   virtual bool mayNeedRelaxation(const MCInst &Inst) const = 0;
 
-  /// fixupNeedsRelaxation - Target specific predicate for whether a given
-  /// fixup requires the associated instruction to be relaxed.
+  /// Target specific predicate for whether a given fixup requires the
+  /// associated instruction to be relaxed.
   virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
                                     const MCRelaxableFragment *DF,
                                     const MCAsmLayout &Layout) const = 0;
 
-  /// RelaxInstruction - Relax the instruction in the given fragment to the next
-  /// wider instruction.
+  /// Relax the instruction in the given fragment to the next wider instruction.
   ///
   /// \param Inst The instruction to relax, which may be the same as the
   /// output.
@@ -112,22 +110,19 @@ public:
 
   /// @}
 
-  /// getMinimumNopSize - Returns the minimum size of a nop in bytes on this
-  /// target. The assembler will use this to emit excess padding in situations
-  /// where the padding required for simple alignment would be less than the
-  /// minimum nop size.
+  /// Returns the minimum size of a nop in bytes on this target. The assembler
+  /// will use this to emit excess padding in situations where the padding
+  /// required for simple alignment would be less than the minimum nop size.
   ///
   virtual unsigned getMinimumNopSize() const { return 1; }
 
-  /// writeNopData - Write an (optimal) nop sequence of Count bytes to the given
-  /// output. If the target cannot generate such a sequence, it should return an
-  /// error.
+  /// Write an (optimal) nop sequence of Count bytes to the given output. If the
+  /// target cannot generate such a sequence, it should return an error.
   ///
   /// \return - True on success.
   virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const = 0;
 
-  /// handleAssemblerFlag - Handle any target-specific assembler flags.
-  /// By default, do nothing.
+  /// Handle any target-specific assembler flags. By default, do nothing.
   virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
 
   /// \brief Generate the compact unwind encoding for the CFI instructions.
diff --git a/include/llvm/MC/MCAsmInfoELF.h b/include/llvm/MC/MCAsmInfoELF.h
index 7bd2460..afd4933 100644
--- a/include/llvm/MC/MCAsmInfoELF.h
+++ b/include/llvm/MC/MCAsmInfoELF.h
@@ -15,8 +15,7 @@
 namespace llvm {
 class MCAsmInfoELF : public MCAsmInfo {
   virtual void anchor();
-  const MCSection *
-  getNonexecutableStackSection(MCContext &Ctx) const override final;
+  const MCSection *getNonexecutableStackSection(MCContext &Ctx) const final;
 
 protected:
   MCAsmInfoELF();
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index 4d1590a..8b012be 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -50,11 +50,6 @@ private:
   /// \brief Is the layout for this fragment valid?
   bool isFragmentValid(const MCFragment *F) const;
 
-  /// \brief Compute the amount of padding required before this fragment to
-  /// obey bundling restrictions.
-  uint64_t computeBundlePadding(const MCFragment *F,
-                                uint64_t FOffset, uint64_t FSize);
-
 public:
   MCAsmLayout(MCAssembler &Assembler);
 
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 9a85293..b8e9227 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -143,7 +143,7 @@ public:
     : MCFragment(FType, SD), BundlePadding(0)
   {
   }
-  virtual ~MCEncodedFragment();
+  ~MCEncodedFragment() override;
 
   virtual SmallVectorImpl<char> &getContents() = 0;
   virtual const SmallVectorImpl<char> &getContents() const = 0;
@@ -182,7 +182,7 @@ public:
   {
   }
 
-  virtual ~MCEncodedFragmentWithFixups();
+  ~MCEncodedFragmentWithFixups() override;
 
   typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
   typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
@@ -1245,11 +1245,23 @@ public:
       FileNames.push_back(FileName);
   }
 
+  /// \brief Write the necessary bundle padding to the given object writer.
+  /// Expects a fragment \p F containing instructions and its size \p FSize.
+  void writeFragmentPadding(const MCFragment &F, uint64_t FSize,
+                            MCObjectWriter *OW) const;
+
   /// @}
 
   void dump();
 };
 
+/// \brief Compute the amount of padding required before the fragment \p F to
+/// obey bundling restrictions, where \p FOffset is the fragment's offset in
+/// its section and \p FSize is the fragment's size.
+uint64_t computeBundlePadding(const MCAssembler &Assembler,
+                              const MCFragment *F,
+                              uint64_t FOffset, uint64_t FSize);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 064f471..bf473cc 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -162,12 +162,44 @@ namespace llvm {
     /// The Compile Unit ID that we are currently processing.
     unsigned DwarfCompileUnitID;
 
-    typedef std::pair<std::string, std::string> SectionGroupPair;
-    typedef std::tuple<std::string, std::string, int> SectionGroupTriple;
+    struct ELFSectionKey {
+      std::string SectionName;
+      StringRef GroupName;
+      unsigned UniqueID;
+      ELFSectionKey(StringRef SectionName, StringRef GroupName,
+                    unsigned UniqueID)
+          : SectionName(SectionName), GroupName(GroupName), UniqueID(UniqueID) {
+      }
+      bool operator<(const ELFSectionKey &Other) const {
+        if (SectionName != Other.SectionName)
+          return SectionName < Other.SectionName;
+        if (GroupName != Other.GroupName)
+          return GroupName < Other.GroupName;
+        return UniqueID < Other.UniqueID;
+      }
+    };
+
+    struct COFFSectionKey {
+      std::string SectionName;
+      StringRef GroupName;
+      int SelectionKey;
+      COFFSectionKey(StringRef SectionName, StringRef GroupName,
+                     int SelectionKey)
+          : SectionName(SectionName), GroupName(GroupName),
+            SelectionKey(SelectionKey) {}
+      bool operator<(const COFFSectionKey &Other) const {
+        if (SectionName != Other.SectionName)
+          return SectionName < Other.SectionName;
+        if (GroupName != Other.GroupName)
+          return GroupName < Other.GroupName;
+        return SelectionKey < Other.SelectionKey;
+      }
+    };
 
     StringMap<const MCSectionMachO*> MachOUniquingMap;
-    std::map<SectionGroupPair, const MCSectionELF *> ELFUniquingMap;
-    std::map<SectionGroupTriple, const MCSectionCOFF *> COFFUniquingMap;
+    std::map<ELFSectionKey, const MCSectionELF *> ELFUniquingMap;
+    std::map<COFFSectionKey, const MCSectionCOFF *> COFFUniquingMap;
+    StringMap<bool> ELFRelSecNames;
 
     /// Do automatic reset in destructor
     bool AutoReset;
@@ -231,8 +263,14 @@ namespace llvm {
 
     MCSymbol *getOrCreateSectionSymbol(const MCSectionELF &Section);
 
+    /// Gets a symbol that will be defined to the final stack offset of a local
+    /// variable after codegen.
+    ///
+    /// @param Idx - The index of a local variable passed to @llvm.frameescape.
     MCSymbol *getOrCreateFrameAllocSymbol(StringRef FuncName, unsigned Idx);
 
+    MCSymbol *getOrCreateParentFrameOffsetSymbol(StringRef FuncName);
+
     /// Get the symbol for \p Name, or null.
     MCSymbol *LookupSymbol(const Twine &Name) const;
 
@@ -265,18 +303,52 @@ namespace llvm {
     }
 
     const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags) {
+      return getELFSection(Section, Type, Flags, nullptr);
+    }
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
                                       unsigned Flags,
-                                      const char *BeginSymName = nullptr);
+                                      const char *BeginSymName) {
+      return getELFSection(Section, Type, Flags, 0, "", BeginSymName);
+    }
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, unsigned EntrySize,
+                                      StringRef Group) {
+      return getELFSection(Section, Type, Flags, EntrySize, Group, nullptr);
+    }
 
     const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
                                       unsigned Flags, unsigned EntrySize,
                                       StringRef Group,
-                                      const char *BeginSymName = nullptr);
+                                      const char *BeginSymName) {
+      return getELFSection(Section, Type, Flags, EntrySize, Group, ~0,
+                           BeginSymName);
+    }
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, unsigned EntrySize,
+                                      StringRef Group, unsigned UniqueID) {
+      return getELFSection(Section, Type, Flags, EntrySize, Group, UniqueID,
+                           nullptr);
+    }
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, unsigned EntrySize,
+                                      StringRef Group, unsigned UniqueID,
+                                      const char *BeginSymName);
 
     const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
                                       unsigned Flags, unsigned EntrySize,
-                                      StringRef Group, bool Unique,
-                                      const char *BeginSymName = nullptr);
+                                      const MCSymbol *Group, unsigned UniqueID,
+                                      const char *BeginSymName,
+                                      const MCSectionELF *Associated);
+
+    const MCSectionELF *createELFRelSection(StringRef Name, unsigned Type,
+                                            unsigned Flags, unsigned EntrySize,
+                                            const MCSymbol *Group,
+                                            const MCSectionELF *Associated);
 
     void renameELFSection(const MCSectionELF *Section, StringRef Name);
 
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 7493507..e9fef76 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -24,6 +24,18 @@ class MCSectionData;
 class MCSymbol;
 class MCSymbolData;
 class MCValue;
+class raw_pwrite_stream;
+
+struct ELFRelocationEntry {
+  uint64_t Offset; // Where is the relocation.
+  const MCSymbol *Symbol;       // The symbol to relocate with.
+  unsigned Type;   // The type of the relocation.
+  uint64_t Addend; // The addend to use.
+
+  ELFRelocationEntry(uint64_t Offset, const MCSymbol *Symbol, unsigned Type,
+                     uint64_t Addend)
+      : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {}
+};
 
 class MCELFObjectTargetWriter {
   const uint8_t OSABI;
@@ -61,6 +73,9 @@ public:
   virtual bool needsRelocateWithSymbol(const MCSymbolData &SD,
                                        unsigned Type) const;
 
+  virtual void sortRelocs(const MCAssembler &Asm,
+                          std::vector<ELFRelocationEntry> &Relocs);
+
   /// @name Accessors
   /// @{
   uint8_t getOSABI() const { return OSABI; }
@@ -116,7 +131,8 @@ public:
 /// \param OS - The stream to write to.
 /// \returns The constructed object writer.
 MCObjectWriter *createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                      raw_ostream &OS, bool IsLittleEndian);
+                                      raw_pwrite_stream &OS,
+                                      bool IsLittleEndian);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
index ab6c5e3..ace4ee3 100644
--- a/include/llvm/MC/MCELFStreamer.h
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -29,23 +29,18 @@ class raw_ostream;
 
 class MCELFStreamer : public MCObjectStreamer {
 public:
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
                 MCCodeEmitter *Emitter)
-      : MCObjectStreamer(Context, TAB, OS, Emitter),
-        SeenIdent(false) {}
+      : MCObjectStreamer(Context, TAB, OS, Emitter), SeenIdent(false) {}
 
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                MCCodeEmitter *Emitter, MCAssembler *Assembler)
-      : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler),
-        SeenIdent(false) {}
-
-  virtual ~MCELFStreamer();
+  ~MCELFStreamer() override;
 
   /// state management
   void reset() override {
+    SeenIdent = false;
     LocalCommons.clear();
     BindingExplicitlySet.clear();
-    SeenIdent = false;
+    BundleGroups.clear();
     MCObjectStreamer::reset();
   }
 
@@ -100,6 +95,9 @@ private:
 
   void fixSymbolsInTLSFixups(const MCExpr *expr);
 
+  /// \brief Merge the content of the fragment \p EF into the fragment \p DF.
+  void mergeFragment(MCDataFragment *, MCEncodedFragmentWithFixups *);
+
   bool SeenIdent;
 
   struct LocalCommon {
@@ -111,11 +109,16 @@ private:
   std::vector<LocalCommon> LocalCommons;
 
   SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+
+  /// BundleGroups - The stack of fragments holding the bundle-locked
+  /// instructions.
+  llvm::SmallVector<MCDataFragment *, 4> BundleGroups;
 };
 
 MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    bool RelaxAll, bool IsThumb);
+                                    raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll,
+                                    bool IsThumb);
 
 } // end namespace llvm
 
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index dce3a06..4181d00 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -19,6 +19,7 @@ class raw_ostream;
 class MCAsmInfo;
 class MCInstrInfo;
 class MCRegisterInfo;
+class MCSubtargetInfo;
 class StringRef;
 
 namespace HexStyle {
@@ -40,9 +41,6 @@ protected:
   const MCInstrInfo &MII;
   const MCRegisterInfo &MRI;
 
-  /// The current set of available features.
-  uint64_t AvailableFeatures;
-
   /// True if we are printing marked up assembly.
   bool UseMarkup;
 
@@ -58,7 +56,7 @@ public:
   MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii,
                 const MCRegisterInfo &mri)
     : CommentStream(nullptr), MAI(mai), MII(mii), MRI(mri),
-      AvailableFeatures(0), UseMarkup(0), PrintImmHex(0),
+      UseMarkup(0), PrintImmHex(0),
       PrintHexStyle(HexStyle::C) {}
 
   virtual ~MCInstPrinter();
@@ -69,7 +67,7 @@ public:
   /// printInst - Print the specified MCInst to the specified raw_ostream.
   ///
   virtual void printInst(const MCInst *MI, raw_ostream &OS,
-                         StringRef Annot) = 0;
+                         StringRef Annot, const MCSubtargetInfo &STI) = 0;
 
   /// getOpcodeName - Return the name of the specified opcode enum (e.g.
   /// "MOV32ri") or empty if we can't resolve it.
@@ -78,9 +76,6 @@ public:
   /// printRegName - Print the assembler register name.
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
 
-  uint64_t getAvailableFeatures() const { return AvailableFeatures; }
-  void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
-
   bool getUseMarkup() const { return UseMarkup; }
   void setUseMarkup(bool Value) { UseMarkup = Value; }
 
diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h
index 890d638..c96d578 100644
--- a/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -141,7 +141,7 @@ public:
 
     public:
       raw_counting_ostream() : Count(0) {}
-      ~raw_counting_ostream() { flush(); }
+      ~raw_counting_ostream() override { flush(); }
     };
 
     raw_counting_ostream OutStream;
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index 514700b..bfe5d36 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -119,7 +119,7 @@ class MachObjectWriter : public MCObjectWriter {
   MachSymbolData *findSymbolData(const MCSymbol &Sym);
 
 public:
-  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &OS,
+  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_pwrite_stream &OS,
                    bool IsLittleEndian)
       : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW) {}
 
@@ -257,13 +257,12 @@ public:
   void computeSectionAddresses(const MCAssembler &Asm,
                                const MCAsmLayout &Layout);
 
-  void markAbsoluteVariableSymbols(MCAssembler &Asm,
-                                   const MCAsmLayout &Layout);
   void ExecutePostLayoutBinding(MCAssembler &Asm,
                                 const MCAsmLayout &Layout) override;
 
   bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                               const MCSymbolData &DataA,
+                                              const MCSymbolData *DataB,
                                               const MCFragment &FB,
                                               bool InSet,
                                               bool IsPCRel) const override;
@@ -280,7 +279,8 @@ public:
 /// \param OS - The stream to write to.
 /// \returns The constructed object writer.
 MCObjectWriter *createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
-                                       raw_ostream &OS, bool IsLittleEndian);
+                                       raw_pwrite_stream &OS,
+                                       bool IsLittleEndian);
 
 } // End llvm namespace
 
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index 2420072..6666a50 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -24,6 +24,7 @@ class MCFragment;
 class MCDataFragment;
 class MCAsmBackend;
 class raw_ostream;
+class raw_pwrite_stream;
 
 /// \brief Streaming object file generation interface.
 ///
@@ -44,16 +45,10 @@ class MCObjectStreamer : public MCStreamer {
   void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
   void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
 
-  // If any labels have been emitted but not assigned fragments, ensure that
-  // they get assigned, either to F if possible or to a new data fragment.
-  void flushPendingLabels(MCFragment *F);
-
 protected:
-  MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+  MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
                    MCCodeEmitter *Emitter);
-  MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                   MCCodeEmitter *Emitter, MCAssembler *Assembler);
-  ~MCObjectStreamer();
+  ~MCObjectStreamer() override;
 
 public:
   /// state management
@@ -87,6 +82,12 @@ protected:
 
   bool changeSectionImpl(const MCSection *Section, const MCExpr *Subsection);
 
+  /// If any labels have been emitted but not assigned fragments, ensure that
+  /// they get assigned, either to F if possible or to a new data fragment.
+  /// Optionally, it is also possible to provide an offset \p FOffset, which
+  /// will be used as a symbol offset within the fragment.
+  void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0);
+
 public:
   void visitUsedSymbol(const MCSymbol &Sym) override;
 
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index fcfa968..f8e2821 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 
@@ -41,12 +42,12 @@ class MCObjectWriter {
   void operator=(const MCObjectWriter &) = delete;
 
 protected:
-  raw_ostream &OS;
+  raw_pwrite_stream &OS;
 
   unsigned IsLittleEndian : 1;
 
 protected: // Can only create subclasses.
-  MCObjectWriter(raw_ostream &OS, bool IsLittleEndian)
+  MCObjectWriter(raw_pwrite_stream &OS, bool IsLittleEndian)
       : OS(OS), IsLittleEndian(IsLittleEndian) {}
 
 public:
@@ -91,12 +92,12 @@ public:
                                           const MCSymbolRefExpr *B,
                                           bool InSet) const;
 
-  virtual bool
-  IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                         const MCSymbolData &DataA,
-                                         const MCFragment &FB,
-                                         bool InSet,
-                                         bool IsPCRel) const;
+  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCSymbolData *DataB,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const;
 
   /// \brief True if this symbol (which is a variable) is weak. This is not
   /// just STB_WEAK, but more generally whether or not we can evaluate
@@ -120,33 +121,27 @@ public:
   }
 
   void WriteLE16(uint16_t Value) {
-    Write8(uint8_t(Value >> 0));
-    Write8(uint8_t(Value >> 8));
+    support::endian::Writer<support::little>(OS).write(Value);
   }
 
   void WriteLE32(uint32_t Value) {
-    WriteLE16(uint16_t(Value >> 0));
-    WriteLE16(uint16_t(Value >> 16));
+    support::endian::Writer<support::little>(OS).write(Value);
   }
 
   void WriteLE64(uint64_t Value) {
-    WriteLE32(uint32_t(Value >> 0));
-    WriteLE32(uint32_t(Value >> 32));
+    support::endian::Writer<support::little>(OS).write(Value);
   }
 
   void WriteBE16(uint16_t Value) {
-    Write8(uint8_t(Value >> 8));
-    Write8(uint8_t(Value >> 0));
+    support::endian::Writer<support::big>(OS).write(Value);
   }
 
   void WriteBE32(uint32_t Value) {
-    WriteBE16(uint16_t(Value >> 16));
-    WriteBE16(uint16_t(Value >> 0));
+    support::endian::Writer<support::big>(OS).write(Value);
   }
 
   void WriteBE64(uint64_t Value) {
-    WriteBE32(uint32_t(Value >> 32));
-    WriteBE32(uint32_t(Value >> 0));
+    support::endian::Writer<support::big>(OS).write(Value);
   }
 
   void Write16(uint16_t Value) {
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index 2f681d6..62d39b2 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -40,7 +40,7 @@ protected:
 
 public:
   AsmLexer(const MCAsmInfo &MAI);
-  ~AsmLexer();
+  ~AsmLexer() override;
 
   void setBuffer(StringRef Buf, const char *ptr = nullptr);
 
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index b6ec1d8..237f6d3 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -53,7 +53,7 @@ class MCSymbol;
       assert ((Characteristics & 0x00F00000) == 0 &&
         "alignment must not be set upon section creation");
     }
-    ~MCSectionCOFF();
+    ~MCSectionCOFF() override;
 
   public:
     /// ShouldOmitSectionDirective - Decides whether a '.section' directive
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 434a5b6..9efe102 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -39,7 +39,7 @@ class MCSectionELF : public MCSection {
   /// below.
   unsigned Flags;
 
-  bool Unique;
+  unsigned UniqueID;
 
   /// EntrySize - The size of each entry in this section. This size only
   /// makes sense for sections that contain fixed-sized entries. If a
@@ -48,14 +48,18 @@ class MCSectionELF : public MCSection {
 
   const MCSymbol *Group;
 
+  /// Depending on the type of the section this is sh_link or sh_info.
+  const MCSectionELF *Associated;
+
 private:
   friend class MCContext;
   MCSectionELF(StringRef Section, unsigned type, unsigned flags, SectionKind K,
-               unsigned entrySize, const MCSymbol *group, bool Unique,
-               MCSymbol *Begin)
+               unsigned entrySize, const MCSymbol *group, unsigned UniqueID,
+               MCSymbol *Begin, const MCSectionELF *Associated)
       : MCSection(SV_ELF, K, Begin), SectionName(Section), Type(type),
-        Flags(flags), Unique(Unique), EntrySize(entrySize), Group(group) {}
-  ~MCSectionELF();
+        Flags(flags), UniqueID(UniqueID), EntrySize(entrySize), Group(group),
+        Associated(Associated) {}
+  ~MCSectionELF() override;
 
   void setSectionName(StringRef Name) { SectionName = Name; }
 
@@ -76,6 +80,11 @@ public:
   bool UseCodeAlign() const override;
   bool isVirtualSection() const override;
 
+  bool isUnique() const { return UniqueID != ~0U; }
+  unsigned getUniqueID() const { return UniqueID; }
+
+  const MCSectionELF *getAssociatedSection() const { return Associated; }
+
   static bool classof(const MCSection *S) {
     return S->getVariant() == SV_ELF;
   }
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index df7610b..ad2bbcf 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -90,7 +90,7 @@ public:
 class AArch64TargetStreamer : public MCTargetStreamer {
 public:
   AArch64TargetStreamer(MCStreamer &S);
-  ~AArch64TargetStreamer();
+  ~AArch64TargetStreamer() override;
 
   void finish() override;
 
@@ -115,7 +115,7 @@ private:
 class ARMTargetStreamer : public MCTargetStreamer {
 public:
   ARMTargetStreamer(MCStreamer &S);
-  ~ARMTargetStreamer();
+  ~ARMTargetStreamer() override;
 
   virtual void emitFnStart();
   virtual void emitFnEnd();
@@ -746,7 +746,8 @@ MCStreamer *createNullStreamer(MCContext &Ctx);
 ///
 /// \param ShowInst - Whether to show the MCInst representation inline with
 /// the assembly.
-MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+MCStreamer *createAsmStreamer(MCContext &Ctx,
+                              std::unique_ptr<formatted_raw_ostream> OS,
                               bool isVerboseAsm, bool useDwarfDirectory,
                               MCInstPrinter *InstPrint, MCCodeEmitter *CE,
                               MCAsmBackend *TAB, bool ShowInst);
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index 53443b0..e4bdfda 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -15,6 +15,7 @@
 #define LLVM_MC_MCSYMBOL_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -42,8 +43,9 @@ namespace llvm {
 
     /// Section - The section the symbol is defined in. This is null for
     /// undefined symbols, and the special AbsolutePseudoSection value for
-    /// absolute symbols.
-    const MCSection *Section;
+    /// absolute symbols. If this is a variable symbol, this caches the
+    /// variable value's section.
+    mutable const MCSection *Section;
 
     /// Value - If non-null, the value for a variable symbol.
     const MCExpr *Value;
@@ -68,6 +70,12 @@ namespace llvm {
 
     MCSymbol(const MCSymbol&) = delete;
     void operator=(const MCSymbol&) = delete;
+    const MCSection *getSectionPtr() const {
+      if (Section || !Value)
+        return Section;
+      return Section = Value->FindAssociatedSection();
+    }
+
   public:
     /// getName - Get the symbol name.
     StringRef getName() const { return Name; }
@@ -103,7 +111,7 @@ namespace llvm {
     ///
     /// Defined symbols are either absolute or in some section.
     bool isDefined() const {
-      return Section != nullptr;
+      return getSectionPtr() != nullptr;
     }
 
     /// isInSection - Check if this symbol is defined in some section (i.e., it
@@ -119,27 +127,27 @@ namespace llvm {
 
     /// isAbsolute - Check if this is an absolute symbol.
     bool isAbsolute() const {
-      return Section == AbsolutePseudoSection;
+      return getSectionPtr() == AbsolutePseudoSection;
     }
 
     /// getSection - Get the section associated with a defined, non-absolute
     /// symbol.
     const MCSection &getSection() const {
       assert(isInSection() && "Invalid accessor!");
-      return *Section;
+      return *getSectionPtr();
     }
 
     /// setSection - Mark the symbol as defined in the section \p S.
-    void setSection(const MCSection &S) { Section = &S; }
+    void setSection(const MCSection &S) {
+      assert(!isVariable() && "Cannot set section of variable");
+      Section = &S;
+    }
 
     /// setUndefined - Mark the symbol as undefined.
     void setUndefined() {
       Section = nullptr;
     }
 
-    /// setAbsolute - Mark the symbol as absolute.
-    void setAbsolute() { Section = AbsolutePseudoSection; }
-
     /// @}
     /// @name Variable Symbols
     /// @{
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index 8412b1d..36db391 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -75,8 +75,6 @@ struct ParseInstructionInfo {
   ParseInstructionInfo() : AsmRewrites(nullptr) {}
   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
     : AsmRewrites(rewrites) {}
-
-  ~ParseInstructionInfo() {}
 };
 
 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
@@ -110,7 +108,7 @@ protected: // Can only create subclasses.
   MCTargetOptions MCOptions;
 
 public:
-  virtual ~MCTargetAsmParser();
+  ~MCTargetAsmParser() override;
 
   uint64_t getAvailableFeatures() const { return AvailableFeatures; }
   void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index 956e436..e2e95c7 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -16,6 +16,7 @@ class MCFixup;
 class MCObjectWriter;
 class MCValue;
 class raw_ostream;
+class raw_pwrite_stream;
 
   class MCWinCOFFObjectTargetWriter {
     virtual void anchor();
@@ -40,7 +41,7 @@ class raw_ostream;
   /// \param OS - The stream to write to.
   /// \returns The constructed object writer.
   MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
-                                            raw_ostream &OS);
+                                            raw_pwrite_stream &OS);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h
index 57a75ce..b3fee81 100644
--- a/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/include/llvm/MC/MCWinCOFFStreamer.h
@@ -24,11 +24,12 @@ class MCSubtargetInfo;
 class MCSymbol;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 class MCWinCOFFStreamer : public MCObjectStreamer {
 public:
   MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, MCCodeEmitter &CE,
-                    raw_ostream &OS);
+                    raw_pwrite_stream &OS);
 
   /// state management
   void reset() override {
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index bfecb8b..6f195d7 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -78,7 +78,7 @@ public:
   std::string getString() const;
 
   /// Adding Features.
-  void AddFeature(StringRef String);
+  void AddFeature(StringRef String, bool Enable = true);
 
   /// ToggleFeature - Toggle a feature and returns the newly updated feature
   /// bits.
diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h
index 74f4666..ef65528 100644
--- a/include/llvm/Object/IRObjectFile.h
+++ b/include/llvm/Object/IRObjectFile.h
@@ -31,7 +31,7 @@ class IRObjectFile : public SymbolicFile {
 
 public:
   IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> M);
-  ~IRObjectFile();
+  ~IRObjectFile() override;
   void moveSymbolNext(DataRefImpl &Symb) const override;
   std::error_code printSymbolName(raw_ostream &OS,
                                   DataRefImpl Symb) const override;
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index b6a8b4d..114b229 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -118,7 +118,7 @@ const uint64_t UnknownAddressOrSize = ~0ULL;
 
 class SymbolicFile : public Binary {
 public:
-  virtual ~SymbolicFile();
+  ~SymbolicFile() override;
   SymbolicFile(unsigned int Type, MemoryBufferRef Source);
 
   // virtual interface.
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index 9a09309..23b0451 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -320,7 +320,7 @@ private:
 
 public:
   InputArgList(const char* const *ArgBegin, const char* const *ArgEnd);
-  ~InputArgList();
+  ~InputArgList() override;
 
   const char *getArgString(unsigned Index) const override {
     return ArgStrings[Index];
@@ -355,7 +355,7 @@ class DerivedArgList : public ArgList {
 public:
   /// Construct a new derived arg list from \p BaseArgs.
   DerivedArgList(const InputArgList &BaseArgs);
-  ~DerivedArgList();
+  ~DerivedArgList() override;
 
   const char *getArgString(unsigned Index) const override {
     return BaseArgs.getArgString(Index);
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 6e92d96..3c4d838 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -250,7 +250,7 @@ public:
 
   explicit ModulePass(char &pid) : Pass(PT_Module, pid) {}
   // Force out-of-line virtual method.
-  virtual ~ModulePass();
+  ~ModulePass() override;
 };
 
 
@@ -279,7 +279,7 @@ public:
   : ModulePass(pid) {}
 
   // Force out-of-line virtual method.
-  virtual ~ImmutablePass();
+  ~ImmutablePass() override;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index de31771..94cf3e8 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -203,7 +203,8 @@ public:
   }
 
   /// \brief Allocate space at the specified alignment.
-  LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, size_t Alignment) {
+  LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void *
+  Allocate(size_t Size, size_t Alignment) {
     assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead.");
 
     // Keep track of how many bytes we've allocated.
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index f7de840..c81fbaf 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -72,20 +72,20 @@
 #define LLVM_NOEXCEPT
 #endif
 
-/// \brief Does the compiler support r-value reference *this?
+/// \brief Does the compiler support ref-qualifiers for *this?
 ///
-/// Sadly, this is separate from just r-value reference support because GCC
-/// implemented this later than everything else.
+/// Sadly, this is separate from just rvalue reference support because GCC
+/// and MSVC implemented this later than everything else.
 #if __has_feature(cxx_rvalue_references) || LLVM_GNUC_PREREQ(4, 8, 1)
 #define LLVM_HAS_RVALUE_REFERENCE_THIS 1
 #else
 #define LLVM_HAS_RVALUE_REFERENCE_THIS 0
 #endif
 
-/// Expands to '&' if r-value references are supported.
+/// Expands to '&' if ref-qualifiers for *this are supported.
 ///
-/// This can be used to provide l-value/r-value overrides of member functions.
-/// The r-value override should be guarded by LLVM_HAS_RVALUE_REFERENCE_THIS
+/// This can be used to provide lvalue/rvalue overrides of member functions.
+/// The rvalue override should be guarded by LLVM_HAS_RVALUE_REFERENCE_THIS
 #if LLVM_HAS_RVALUE_REFERENCE_THIS
 #define LLVM_LVALUE_FUNCTION &
 #else
@@ -223,6 +223,16 @@
 #define LLVM_ATTRIBUTE_RETURNS_NONNULL
 #endif
 
+/// \macro LLVM_ATTRIBUTE_RETURNS_NOALIAS Used to mark a function as returning a
+/// pointer that does not alias any other valid pointer.
+#ifdef __GNUC__
+#define LLVM_ATTRIBUTE_RETURNS_NOALIAS __attribute__((__malloc__))
+#elif defined(_MSC_VER)
+#define LLVM_ATTRIBUTE_RETURNS_NOALIAS __declspec(restrict)
+#else
+#define LLVM_ATTRIBUTE_RETURNS_NOALIAS
+#endif
+
 /// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
 /// pedantic diagnostics.
 #ifdef __GNUC__
@@ -281,6 +291,37 @@
 # define LLVM_ASSUME_ALIGNED(p, a) (p)
 #endif
 
+/// \macro LLVM_ALIGNAS
+/// \brief Used to specify a minimum alignment for a structure or variable. The
+/// alignment must be a constant integer. Use LLVM_PTR_SIZE to compute
+/// alignments in terms of the size of a pointer.
+///
+/// Note that __declspec(align) has special quirks, it's not legal to pass a
+/// structure with __declspec(align) as a formal parameter.
+#ifdef _MSC_VER
+# define LLVM_ALIGNAS(x) __declspec(align(x))
+#elif __GNUC__ && !__has_feature(cxx_alignas) && !LLVM_GNUC_PREREQ(4, 8, 0)
+# define LLVM_ALIGNAS(x) __attribute__((aligned(x)))
+#else
+# define LLVM_ALIGNAS(x) alignas(x)
+#endif
+
+/// \macro LLVM_PTR_SIZE
+/// \brief A constant integer equivalent to the value of sizeof(void*).
+/// Generally used in combination with LLVM_ALIGNAS or when doing computation in
+/// the preprocessor.
+#ifdef __SIZEOF_POINTER__
+# define LLVM_PTR_SIZE __SIZEOF_POINTER__
+#elif defined(_WIN64)
+# define LLVM_PTR_SIZE 8
+#elif defined(_WIN32)
+# define LLVM_PTR_SIZE 4
+#elif defined(_MSC_VER)
+# error "could not determine LLVM_PTR_SIZE as a constant int for MSVC"
+#else
+# define LLVM_PTR_SIZE sizeof(void *)
+#endif
+
 /// \macro LLVM_FUNCTION_NAME
 /// \brief Expands to __func__ on compilers which support it.  Otherwise,
 /// expands to a compiler-dependent replacement.
diff --git a/include/llvm/Support/ELFRelocs/Mips.def b/include/llvm/Support/ELFRelocs/Mips.def
index dc57346..0e6de3b 100644
--- a/include/llvm/Support/ELFRelocs/Mips.def
+++ b/include/llvm/Support/ELFRelocs/Mips.def
@@ -110,3 +110,4 @@ ELF_RELOC(R_MICROMIPS_GPREL7_S2,        172)
 ELF_RELOC(R_MICROMIPS_PC23_S2,          173)
 ELF_RELOC(R_MIPS_NUM,                218)
 ELF_RELOC(R_MIPS_PC32,               248)
+ELF_RELOC(R_MIPS_EH,                 249)
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index b3b44c4..a736c32 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -120,7 +120,7 @@ class UniqueID {
   uint64_t File;
 
 public:
-  UniqueID() {}
+  UniqueID() = default;
   UniqueID(uint64_t Device, uint64_t File) : Device(Device), File(File) {}
   bool operator==(const UniqueID &Other) const {
     return Device == Other.Device && File == Other.File;
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
index 8137daa..4a135cd 100644
--- a/include/llvm/Support/FormattedStream.h
+++ b/include/llvm/Support/FormattedStream.h
@@ -25,27 +25,11 @@ namespace llvm {
 /// boundaries and querying the number of lines written to the stream.
 ///
 class formatted_raw_ostream : public raw_ostream {
-public:
-  /// DELETE_STREAM - Tell the destructor to delete the held stream.
-  ///
-  static const bool DELETE_STREAM = true;
-
-  /// PRESERVE_STREAM - Tell the destructor to not delete the held
-  /// stream.
-  ///
-  static const bool PRESERVE_STREAM = false;
-
-private:
   /// TheStream - The real stream we output to. We set it to be
   /// unbuffered, since we're already doing our own buffering.
   ///
   raw_ostream *TheStream;
 
-  /// DeleteStream - Do we need to delete TheStream in the
-  /// destructor?
-  ///
-  bool DeleteStream;
-
   /// Position - The current output column and line of the data that's
   /// been flushed and the portion of the buffer that's been
   /// scanned.  The line and column scheme is zero-based.
@@ -73,6 +57,24 @@ private:
   ///
   void ComputePosition(const char *Ptr, size_t size);
 
+  void setStream(raw_ostream &Stream) {
+    releaseStream();
+
+    TheStream = &Stream;
+
+    // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
+    // own buffering, and it doesn't need or want TheStream to do another
+    // layer of buffering underneath. Resize the buffer to what TheStream
+    // had been using, and tell TheStream not to do its own buffering.
+    if (size_t BufferSize = TheStream->GetBufferSize())
+      SetBufferSize(BufferSize);
+    else
+      SetUnbuffered();
+    TheStream->SetUnbuffered();
+
+    Scanned = nullptr;
+  }
+
 public:
   /// formatted_raw_ostream - Open the specified file for
   /// writing. If an error occurs, information about the error is
@@ -84,39 +86,19 @@ public:
   /// so it doesn't want another layer of buffering to be happening
   /// underneath it.
   ///
-  formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) 
-    : raw_ostream(), TheStream(nullptr), DeleteStream(false), Position(0, 0) {
-    setStream(Stream, Delete);
+  formatted_raw_ostream(raw_ostream &Stream)
+      : TheStream(nullptr), Position(0, 0) {
+    setStream(Stream);
   }
-  explicit formatted_raw_ostream()
-    : raw_ostream(), TheStream(nullptr), DeleteStream(false), Position(0, 0) {
+  explicit formatted_raw_ostream() : TheStream(nullptr), Position(0, 0) {
     Scanned = nullptr;
   }
 
-  ~formatted_raw_ostream() {
+  ~formatted_raw_ostream() override {
     flush();
     releaseStream();
   }
 
-  void setStream(raw_ostream &Stream, bool Delete = false) {
-    releaseStream();
-
-    TheStream = &Stream;
-    DeleteStream = Delete;
-
-    // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
-    // own buffering, and it doesn't need or want TheStream to do another
-    // layer of buffering underneath. Resize the buffer to what TheStream
-    // had been using, and tell TheStream not to do its own buffering.
-    if (size_t BufferSize = TheStream->GetBufferSize())
-      SetBufferSize(BufferSize);
-    else
-      SetUnbuffered();
-    TheStream->SetUnbuffered();
-
-    Scanned = nullptr;
-  }
-
   /// PadToColumn - Align the output to some column number.  If the current
   /// column is already equal to or more than NewCol, PadToColumn inserts one
   /// space.
@@ -151,13 +133,11 @@ public:
 
 private:
   void releaseStream() {
-    // Delete the stream if needed. Otherwise, transfer the buffer
-    // settings from this raw_ostream back to the underlying stream.
+    // Transfer the buffer settings from this raw_ostream back to the underlying
+    // stream.
     if (!TheStream)
       return;
-    if (DeleteStream)
-      delete TheStream;
-    else if (size_t BufferSize = GetBufferSize())
+    if (size_t BufferSize = GetBufferSize())
       TheStream->SetBufferSize(BufferSize);
     else
       TheStream->SetUnbuffered();
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index 0998eb9..63678bb 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -243,6 +243,8 @@ protected:
     this->Roots.clear();
     Vertex.clear();
     RootNode = nullptr;
+    DFSInfoValid = false;
+    SlowQueries = 0;
   }
 
   // NewBB is split and now it has one successor. Update dominator tree to
@@ -637,9 +639,38 @@ protected:
   friend void
   Calculate(DominatorTreeBase<typename GraphTraits<N>::NodeType> &DT, FuncT &F);
 
+
+  DomTreeNodeBase<NodeT> *getNodeForBlock(NodeT *BB) {
+    if (DomTreeNodeBase<NodeT> *Node = getNode(BB))
+      return Node;
+
+    // Haven't calculated this node yet?  Get or calculate the node for the
+    // immediate dominator.
+    NodeT *IDom = getIDom(BB);
+
+    assert(IDom || this->DomTreeNodes[nullptr]);
+    DomTreeNodeBase<NodeT> *IDomNode = getNodeForBlock(IDom);
+
+    // Add a new tree node for this NodeT, and link it as a child of
+    // IDomNode
+    return (this->DomTreeNodes[BB] = IDomNode->addChild(
+                llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode))).get();
+  }
+
+  NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); }
+
+  void addRoot(NodeT *BB) { this->Roots.push_back(BB); }
+
+public:
   /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking
   /// dominator tree in dfs order.
   void updateDFSNumbers() const {
+
+    if (DFSInfoValid) {
+      SlowQueries = 0;
+      return;
+    }
+
     unsigned DFSNum = 0;
 
     SmallVector<std::pair<const DomTreeNodeBase<NodeT> *,
@@ -682,28 +713,6 @@ protected:
     DFSInfoValid = true;
   }
 
-  DomTreeNodeBase<NodeT> *getNodeForBlock(NodeT *BB) {
-    if (DomTreeNodeBase<NodeT> *Node = getNode(BB))
-      return Node;
-
-    // Haven't calculated this node yet?  Get or calculate the node for the
-    // immediate dominator.
-    NodeT *IDom = getIDom(BB);
-
-    assert(IDom || this->DomTreeNodes[nullptr]);
-    DomTreeNodeBase<NodeT> *IDomNode = getNodeForBlock(IDom);
-
-    // Add a new tree node for this NodeT, and link it as a child of
-    // IDomNode
-    return (this->DomTreeNodes[BB] = IDomNode->addChild(
-                llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode))).get();
-  }
-
-  NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); }
-
-  void addRoot(NodeT *BB) { this->Roots.push_back(BB); }
-
-public:
   /// recalculate - compute a dominator tree for the given function
   template <class FT> void recalculate(FT &F) {
     typedef GraphTraits<FT *> TraitsTy;
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 388d82c..7edc2ac 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -318,31 +318,31 @@ inline bool isIntN(unsigned N, int64_t x) {
   return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1)));
 }
 
-/// isMask_32 - This function returns true if the argument is a sequence of ones
-/// starting at the least significant bit with the remainder zero (32 bit
-/// version).   Ex. isMask_32(0x0000FFFFU) == true.
+/// isMask_32 - This function returns true if the argument is a non-empty
+/// sequence of ones starting at the least significant bit with the remainder
+/// zero (32 bit version).  Ex. isMask_32(0x0000FFFFU) == true.
 inline bool isMask_32(uint32_t Value) {
   return Value && ((Value + 1) & Value) == 0;
 }
 
-/// isMask_64 - This function returns true if the argument is a sequence of ones
-/// starting at the least significant bit with the remainder zero (64 bit
-/// version).
+/// isMask_64 - This function returns true if the argument is a non-empty
+/// sequence of ones starting at the least significant bit with the remainder
+/// zero (64 bit version).
 inline bool isMask_64(uint64_t Value) {
   return Value && ((Value + 1) & Value) == 0;
 }
 
 /// isShiftedMask_32 - This function returns true if the argument contains a
-/// sequence of ones with the remainder zero (32 bit version.)
+/// non-empty sequence of ones with the remainder zero (32 bit version.)
 /// Ex. isShiftedMask_32(0x0000FF00U) == true.
 inline bool isShiftedMask_32(uint32_t Value) {
-  return isMask_32((Value - 1) | Value);
+  return Value && isMask_32((Value - 1) | Value);
 }
 
 /// isShiftedMask_64 - This function returns true if the argument contains a
-/// sequence of ones with the remainder zero (64 bit version.)
+/// non-empty sequence of ones with the remainder zero (64 bit version.)
 inline bool isShiftedMask_64(uint64_t Value) {
-  return isMask_64((Value - 1) | Value);
+  return Value && isMask_64((Value - 1) | Value);
 }
 
 /// isPowerOf2_32 - This function returns true if the argument is a power of
diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h
index 52f133c..0f097f2 100644
--- a/include/llvm/Support/OnDiskHashTable.h
+++ b/include/llvm/Support/OnDiskHashTable.h
@@ -75,13 +75,10 @@ template <typename Info> class OnDiskChainedHashTableGenerator {
   llvm::SpecificBumpPtrAllocator<Item> BA;
 
   /// \brief A linked list of values in a particular hash bucket.
-  class Bucket {
-  public:
+  struct Bucket {
     offset_type Off;
-    Item *Head;
     unsigned Length;
-
-    Bucket() {}
+    Item *Head;
   };
 
   Bucket *Buckets;
diff --git a/include/llvm/Support/Options.h b/include/llvm/Support/Options.h
index 4fd1bff..2742d39 100644
--- a/include/llvm/Support/Options.h
+++ b/include/llvm/Support/Options.h
@@ -61,7 +61,7 @@ char OptionKey<ValT, Base, Mem>::ID = 0;
 /// The OptionRegistry is responsible for managing lifetimes of the options and
 /// provides interfaces for option registration and reading values from options.
 /// This object is a singleton, only one instance should ever exist so that all
-/// options are registered in teh same place.
+/// options are registered in the same place.
 class OptionRegistry {
 private:
   DenseMap<void *, cl::Option *> Options;
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index a067b13..7e165d7 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -39,7 +39,7 @@ namespace sys {
   /// When an error signal (such as SIBABRT or SIGSEGV) is delivered to the
   /// process, print a stack trace and then exit.
   /// @brief Print a stack trace if a fatal signal occurs.
-  void PrintStackTraceOnErrorSignal();
+  void PrintStackTraceOnErrorSignal(bool DisableCrashReporting = false);
 
   /// Disable all system dialog boxes that appear when the process crashes.
   void DisableSystemDialogsOnCrash();
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index ba6bbde..a4bf51f 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -22,6 +22,7 @@
 #include "llvm-c/Disassembler.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/FormattedStream.h"
 #include <cassert>
 #include <memory>
 #include <string>
@@ -49,20 +50,22 @@ namespace llvm {
   class TargetMachine;
   class TargetOptions;
   class raw_ostream;
+  class raw_pwrite_stream;
   class formatted_raw_ostream;
 
   MCStreamer *createNullStreamer(MCContext &Ctx);
-  MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+  MCStreamer *createAsmStreamer(MCContext &Ctx,
+                                std::unique_ptr<formatted_raw_ostream> OS,
                                 bool isVerboseAsm, bool useDwarfDirectory,
                                 MCInstPrinter *InstPrint, MCCodeEmitter *CE,
                                 MCAsmBackend *TAB, bool ShowInst);
 
   /// Takes ownership of \p TAB and \p CE.
   MCStreamer *createELFStreamer(MCContext &Ctx, MCAsmBackend &TAB,
-                                raw_ostream &OS, MCCodeEmitter *CE,
+                                raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                 bool RelaxAll);
   MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
-                                  raw_ostream &OS, MCCodeEmitter *CE,
+                                  raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                   bool RelaxAll, bool DWARFMustBeAtTheEnd,
                                   bool LabelSections = false);
 
@@ -124,24 +127,24 @@ namespace llvm {
     typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T,
                                                     const MCSubtargetInfo &STI,
                                                     MCContext &Ctx);
-    typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
+    typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Triple &T,
                                                   unsigned SyntaxVariant,
                                                   const MCAsmInfo &MAI,
                                                   const MCInstrInfo &MII,
-                                                  const MCRegisterInfo &MRI,
-                                                  const MCSubtargetInfo &STI);
+                                                  const MCRegisterInfo &MRI);
     typedef MCCodeEmitter *(*MCCodeEmitterCtorTy)(const MCInstrInfo &II,
                                                   const MCRegisterInfo &MRI,
                                                   MCContext &Ctx);
     typedef MCStreamer *(*ELFStreamerCtorTy)(const Triple &T, MCContext &Ctx,
-                                             MCAsmBackend &TAB, raw_ostream &OS,
+                                             MCAsmBackend &TAB,
+                                             raw_pwrite_stream &OS,
                                              MCCodeEmitter *Emitter,
                                              bool RelaxAll);
     typedef MCStreamer *(*MachOStreamerCtorTy)(
-        MCContext &Ctx, MCAsmBackend &TAB, raw_ostream &OS,
+        MCContext &Ctx, MCAsmBackend &TAB, raw_pwrite_stream &OS,
         MCCodeEmitter *Emitter, bool RelaxAll, bool DWARFMustBeAtTheEnd);
     typedef MCStreamer *(*COFFStreamerCtorTy)(MCContext &Ctx, MCAsmBackend &TAB,
-                                              raw_ostream &OS,
+                                              raw_pwrite_stream &OS,
                                               MCCodeEmitter *Emitter,
                                               bool RelaxAll);
     typedef MCTargetStreamer *(*NullTargetStreamerCtorTy)(MCStreamer &S);
@@ -409,14 +412,13 @@ namespace llvm {
       return MCDisassemblerCtorFn(*this, STI, Ctx);
     }
 
-    MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant,
+    MCInstPrinter *createMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
                                        const MCAsmInfo &MAI,
                                        const MCInstrInfo &MII,
-                                       const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI) const {
+                                       const MCRegisterInfo &MRI) const {
       if (!MCInstPrinterCtorFn)
         return nullptr;
-      return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI, MII, MRI, STI);
+      return MCInstPrinterCtorFn(T, SyntaxVariant, MAI, MII, MRI);
     }
 
 
@@ -438,7 +440,7 @@ namespace llvm {
     /// \param Emitter The target independent assembler object.Takes ownership.
     /// \param RelaxAll Relax all fixups?
     MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx,
-                                       MCAsmBackend &TAB, raw_ostream &OS,
+                                       MCAsmBackend &TAB, raw_pwrite_stream &OS,
                                        MCCodeEmitter *Emitter,
                                        const MCSubtargetInfo &STI,
                                        bool RelaxAll,
@@ -471,14 +473,16 @@ namespace llvm {
       return S;
     }
 
-    MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+    MCStreamer *createAsmStreamer(MCContext &Ctx,
+                                  std::unique_ptr<formatted_raw_ostream> OS,
                                   bool IsVerboseAsm, bool UseDwarfDirectory,
                                   MCInstPrinter *InstPrint, MCCodeEmitter *CE,
                                   MCAsmBackend *TAB, bool ShowInst) const {
-      MCStreamer *S =
-          llvm::createAsmStreamer(Ctx, OS, IsVerboseAsm, UseDwarfDirectory,
-                                  InstPrint, CE, TAB, ShowInst);
-      createAsmTargetStreamer(*S, OS, InstPrint, IsVerboseAsm);
+      formatted_raw_ostream &OSRef = *OS;
+      MCStreamer *S = llvm::createAsmStreamer(Ctx, std::move(OS), IsVerboseAsm,
+                                              UseDwarfDirectory, InstPrint, CE,
+                                              TAB, ShowInst);
+      createAsmTargetStreamer(*S, OSRef, InstPrint, IsVerboseAsm);
       return S;
     }
 
diff --git a/include/llvm/Support/ToolOutputFile.h b/include/llvm/Support/ToolOutputFile.h
index d98e7bb..1be26c2 100644
--- a/include/llvm/Support/ToolOutputFile.h
+++ b/include/llvm/Support/ToolOutputFile.h
@@ -18,16 +18,16 @@
 
 namespace llvm {
 
-/// tool_output_file - This class contains a raw_fd_ostream and adds a
-/// few extra features commonly needed for compiler-like tool output files:
+/// This class contains a raw_fd_ostream and adds a few extra features commonly
+/// needed for compiler-like tool output files:
 ///   - The file is automatically deleted if the process is killed.
 ///   - The file is automatically deleted when the tool_output_file
 ///     object is destroyed unless the client calls keep().
 class tool_output_file {
-  /// Installer - This class is declared before the raw_fd_ostream so that
-  /// it is constructed before the raw_fd_ostream is constructed and
-  /// destructed after the raw_fd_ostream is destructed. It installs
-  /// cleanups in its constructor and uninstalls them in its destructor.
+  /// This class is declared before the raw_fd_ostream so that it is constructed
+  /// before the raw_fd_ostream is constructed and destructed after the
+  /// raw_fd_ostream is destructed. It installs cleanups in its constructor and
+  /// uninstalls them in its destructor.
   class CleanupInstaller {
     /// The name of the file.
     std::string Filename;
@@ -39,8 +39,7 @@ class tool_output_file {
     ~CleanupInstaller();
   } Installer;
 
-  /// OS - The contained stream. This is intentionally declared after
-  /// Installer.
+  /// The contained stream. This is intentionally declared after Installer.
   raw_fd_ostream OS;
 
 public:
@@ -51,11 +50,11 @@ public:
 
   tool_output_file(StringRef Filename, int FD);
 
-  /// os - Return the contained raw_fd_ostream.
+  /// Return the contained raw_fd_ostream.
   raw_fd_ostream &os() { return OS; }
 
-  /// keep - Indicate that the tool's job wrt this output file has been
-  /// successful and the file should not be deleted.
+  /// Indicate that the tool's job wrt this output file has been successful and
+  /// the file should not be deleted.
   void keep() { Installer.Keep = true; }
 };
 
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index 78829f8..672cf30 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -888,7 +888,7 @@ public:
         void *Ctxt = nullptr,
         SourceMgr::DiagHandlerTy DiagHandler = nullptr,
         void *DiagHandlerCtxt = nullptr);
-  ~Input();
+  ~Input() override;
 
   // Check if there was an syntax or semantic error during parsing.
   std::error_code error();
@@ -955,7 +955,7 @@ private:
   };
 
   class MapHNode : public HNode {
-    virtual void anchor();
+    void anchor() override;
 
   public:
     MapHNode(Node *n) : HNode(n) { }
@@ -974,7 +974,7 @@ private:
   };
 
   class SequenceHNode : public HNode {
-    virtual void anchor();
+    void anchor() override;
 
   public:
     SequenceHNode(Node *n) : HNode(n) { }
@@ -1020,7 +1020,7 @@ private:
 class Output : public IO {
 public:
   Output(llvm::raw_ostream &, void *Ctxt=nullptr);
-  virtual ~Output();
+  ~Output() override;
 
   bool outputting() override;
   bool mapTag(StringRef, bool) override;
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index ee7b89f..19f9c2c 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -107,30 +107,17 @@ namespace llvm
     /// management of it, etc.
     ///
     circular_raw_ostream(raw_ostream &Stream, const char *Header,
-                         size_t BuffSize = 0, bool Owns = REFERENCE_ONLY) 
-        : raw_ostream(/*unbuffered*/true),
-            TheStream(nullptr),
-            OwnsStream(Owns),
-            BufferSize(BuffSize),
-            BufferArray(nullptr),
-            Filled(false),
-            Banner(Header) {
+                         size_t BuffSize = 0, bool Owns = REFERENCE_ONLY)
+        : raw_ostream(/*unbuffered*/ true), TheStream(nullptr),
+          OwnsStream(Owns), BufferSize(BuffSize), BufferArray(nullptr),
+          Filled(false), Banner(Header) {
       if (BufferSize != 0)
         BufferArray = new char[BufferSize];
       Cur = BufferArray;
       setStream(Stream, Owns);
     }
-    explicit circular_raw_ostream()
-        : raw_ostream(/*unbuffered*/true),
-            TheStream(nullptr),
-            OwnsStream(REFERENCE_ONLY),
-            BufferArray(nullptr),
-            Filled(false),
-            Banner("") {
-      Cur = BufferArray;
-    }
 
-    ~circular_raw_ostream() {
+    ~circular_raw_ostream() override {
       flush();
       flushBufferWithBanner();
       releaseStream();
diff --git a/include/llvm/Support/raw_os_ostream.h b/include/llvm/Support/raw_os_ostream.h
index 04cf3b6..a983aeb 100644
--- a/include/llvm/Support/raw_os_ostream.h
+++ b/include/llvm/Support/raw_os_ostream.h
@@ -34,7 +34,7 @@ class raw_os_ostream : public raw_ostream {
 
 public:
   raw_os_ostream(std::ostream &O) : OS(O) {}
-  ~raw_os_ostream();
+  ~raw_os_ostream() override;
 };
 
 } // end llvm namespace
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 12783c7..338f0f4 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -20,21 +20,20 @@
 #include <system_error>
 
 namespace llvm {
-  class format_object_base;
-  class FormattedString;
-  class FormattedNumber;
-  template <typename T>
-  class SmallVectorImpl;
-
-  namespace sys {
-    namespace fs {
-      enum OpenFlags : unsigned;
-    }
-  }
-
-/// raw_ostream - This class implements an extremely fast bulk output stream
-/// that can *only* output to a stream.  It does not support seeking, reopening,
-/// rewinding, line buffered disciplines etc. It is a simple buffer that outputs
+class format_object_base;
+class FormattedString;
+class FormattedNumber;
+template <typename T> class SmallVectorImpl;
+
+namespace sys {
+namespace fs {
+enum OpenFlags : unsigned;
+}
+}
+
+/// This class implements an extremely fast bulk output stream that can *only*
+/// output to a stream.  It does not support seeking, reopening, rewinding, line
+/// buffered disciplines etc. It is a simple buffer that outputs
 /// a chunk at a time.
 class raw_ostream {
 private:
@@ -81,8 +80,8 @@ public:
     SAVEDCOLOR
   };
 
-  explicit raw_ostream(bool unbuffered=false)
-    : BufferMode(unbuffered ? Unbuffered : InternalBuffer) {
+  explicit raw_ostream(bool unbuffered = false)
+      : BufferMode(unbuffered ? Unbuffered : InternalBuffer) {
     // Start out ready to flush.
     OutBufStart = OutBufEnd = OutBufCur = nullptr;
   }
@@ -96,12 +95,11 @@ public:
   // Configuration Interface
   //===--------------------------------------------------------------------===//
 
-  /// SetBuffered - Set the stream to be buffered, with an automatically
-  /// determined buffer size.
+  /// Set the stream to be buffered, with an automatically determined buffer
+  /// size.
   void SetBuffered();
 
-  /// SetBufferSize - Set the stream to be buffered, using the
-  /// specified buffer size.
+  /// Set the stream to be buffered, using the specified buffer size.
   void SetBufferSize(size_t Size) {
     flush();
     SetBufferAndMode(new char[Size], Size, InternalBuffer);
@@ -117,10 +115,9 @@ public:
     return OutBufEnd - OutBufStart;
   }
 
-  /// SetUnbuffered - Set the stream to be unbuffered. When
-  /// unbuffered, the stream will flush after every write. This routine
-  /// will also flush the buffer immediately when the stream is being
-  /// set to unbuffered.
+  /// Set the stream to be unbuffered. When unbuffered, the stream will flush
+  /// after every write. This routine will also flush the buffer immediately
+  /// when the stream is being set to unbuffered.
   void SetUnbuffered() {
     flush();
     SetBufferAndMode(nullptr, 0, Unbuffered);
@@ -204,11 +201,11 @@ public:
 
   raw_ostream &operator<<(double N);
 
-  /// write_hex - Output \p N in hexadecimal, without any prefix or padding.
+  /// Output \p N in hexadecimal, without any prefix or padding.
   raw_ostream &write_hex(unsigned long long N);
 
-  /// write_escaped - Output \p Str, turning '\\', '\t', '\n', '"', and
-  /// anything that doesn't satisfy std::isprint into an escape sequence.
+  /// Output \p Str, turning '\\', '\t', '\n', '"', and anything that doesn't
+  /// satisfy std::isprint into an escape sequence.
   raw_ostream &write_escaped(StringRef Str, bool UseHexEscapes = false);
 
   raw_ostream &write(unsigned char C);
@@ -263,8 +260,8 @@ public:
   //===--------------------------------------------------------------------===//
 
 private:
-  /// write_impl - The is the piece of the class that is implemented
-  /// by subclasses.  This writes the \p Size bytes starting at
+  /// The is the piece of the class that is implemented by subclasses.  This
+  /// writes the \p Size bytes starting at
   /// \p Ptr to the underlying stream.
   ///
   /// This function is guaranteed to only be called at a point at which it is
@@ -281,51 +278,58 @@ private:
   // An out of line virtual method to provide a home for the class vtable.
   virtual void handle();
 
-  /// current_pos - Return the current position within the stream, not
-  /// counting the bytes currently in the buffer.
+  /// Return the current position within the stream, not counting the bytes
+  /// currently in the buffer.
   virtual uint64_t current_pos() const = 0;
 
 protected:
-  /// SetBuffer - Use the provided buffer as the raw_ostream buffer. This is
-  /// intended for use only by subclasses which can arrange for the output to go
-  /// directly into the desired output buffer, instead of being copied on each
-  /// flush.
+  /// Use the provided buffer as the raw_ostream buffer. This is intended for
+  /// use only by subclasses which can arrange for the output to go directly
+  /// into the desired output buffer, instead of being copied on each flush.
   void SetBuffer(char *BufferStart, size_t Size) {
     SetBufferAndMode(BufferStart, Size, ExternalBuffer);
   }
 
-  /// preferred_buffer_size - Return an efficient buffer size for the
-  /// underlying output mechanism.
+  /// Return an efficient buffer size for the underlying output mechanism.
   virtual size_t preferred_buffer_size() const;
 
-  /// getBufferStart - Return the beginning of the current stream buffer, or 0
-  /// if the stream is unbuffered.
+  /// Return the beginning of the current stream buffer, or 0 if the stream is
+  /// unbuffered.
   const char *getBufferStart() const { return OutBufStart; }
 
   //===--------------------------------------------------------------------===//
   // Private Interface
   //===--------------------------------------------------------------------===//
 private:
-  /// SetBufferAndMode - Install the given buffer and mode.
+  /// Install the given buffer and mode.
   void SetBufferAndMode(char *BufferStart, size_t Size, BufferKind Mode);
 
-  /// flush_nonempty - Flush the current buffer, which is known to be
-  /// non-empty. This outputs the currently buffered data and resets
-  /// the buffer to empty.
+  /// Flush the current buffer, which is known to be non-empty. This outputs the
+  /// currently buffered data and resets the buffer to empty.
   void flush_nonempty();
 
-  /// copy_to_buffer - Copy data into the buffer. Size must not be
-  /// greater than the number of unused bytes in the buffer.
+  /// Copy data into the buffer. Size must not be greater than the number of
+  /// unused bytes in the buffer.
   void copy_to_buffer(const char *Ptr, size_t Size);
 };
 
+/// An abstract base class for streams implementations that also support a
+/// pwrite operation. This is usefull for code that can mostly stream out data,
+/// but needs to patch in a header that needs to know the output size.
+class raw_pwrite_stream : public raw_ostream {
+public:
+  explicit raw_pwrite_stream(bool Unbuffered = false)
+      : raw_ostream(Unbuffered) {}
+  virtual void pwrite(const char *Ptr, size_t Size, uint64_t Offset) = 0;
+};
+
 //===----------------------------------------------------------------------===//
 // File Output Streams
 //===----------------------------------------------------------------------===//
 
-/// raw_fd_ostream - A raw_ostream that writes to a file descriptor.
+/// A raw_ostream that writes to a file descriptor.
 ///
-class raw_fd_ostream : public raw_ostream {
+class raw_fd_ostream : public raw_pwrite_stream {
   int FD;
   bool ShouldClose;
 
@@ -339,18 +343,19 @@ class raw_fd_ostream : public raw_ostream {
 
   uint64_t pos;
 
-  /// write_impl - See raw_ostream::write_impl.
+  bool SupportsSeeking;
+
+  /// See raw_ostream::write_impl.
   void write_impl(const char *Ptr, size_t Size) override;
 
-  /// current_pos - Return the current position within the stream, not
-  /// counting the bytes currently in the buffer.
+  /// Return the current position within the stream, not counting the bytes
+  /// currently in the buffer.
   uint64_t current_pos() const override { return pos; }
 
-  /// preferred_buffer_size - Determine an efficient buffer size.
+  /// Determine an efficient buffer size.
   size_t preferred_buffer_size() const override;
 
-  /// error_detected - Set the flag indicating that an output error has
-  /// been encountered.
+  /// Set the flag indicating that an output error has been encountered.
   void error_detected() { Error = true; }
 
 public:
@@ -367,22 +372,26 @@ public:
   raw_fd_ostream(StringRef Filename, std::error_code &EC,
                  sys::fs::OpenFlags Flags);
 
-  /// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
-  /// ShouldClose is true, this closes the file when the stream is destroyed.
+  /// FD is the file descriptor that this writes to.  If ShouldClose is true,
+  /// this closes the file when the stream is destroyed.
   raw_fd_ostream(int fd, bool shouldClose, bool unbuffered=false);
 
-  ~raw_fd_ostream();
+  ~raw_fd_ostream() override;
 
-  /// close - Manually flush the stream and close the file.
-  /// Note that this does not call fsync.
+  /// Manually flush the stream and close the file. Note that this does not call
+  /// fsync.
   void close();
 
-  /// seek - Flushes the stream and repositions the underlying file descriptor
-  /// position to the offset specified from the beginning of the file.
+  bool supportsSeeking() { return SupportsSeeking; }
+
+  /// Flushes the stream and repositions the underlying file descriptor position
+  /// to the offset specified from the beginning of the file.
   uint64_t seek(uint64_t off);
 
-  /// SetUseAtomicWrite - Set the stream to attempt to use atomic writes for
-  /// individual output routines where possible.
+  void pwrite(const char *Ptr, size_t Size, uint64_t Offset) override;
+
+  /// Set the stream to attempt to use atomic writes for individual output
+  /// routines where possible.
   ///
   /// Note that because raw_ostream's are typically buffered, this flag is only
   /// sensible when used on unbuffered streams which will flush their output
@@ -401,18 +410,18 @@ public:
 
   bool has_colors() const override;
 
-  /// has_error - Return the value of the flag in this raw_fd_ostream indicating
-  /// whether an output error has been encountered.
+  /// Return the value of the flag in this raw_fd_ostream indicating whether an
+  /// output error has been encountered.
   /// This doesn't implicitly flush any pending output.  Also, it doesn't
   /// guarantee to detect all errors unless the stream has been closed.
   bool has_error() const {
     return Error;
   }
 
-  /// clear_error - Set the flag read by has_error() to false. If the error
-  /// flag is set at the time when this raw_ostream's destructor is called,
-  /// report_fatal_error is called to report the error. Use clear_error()
-  /// after handling the error to avoid this behavior.
+  /// Set the flag read by has_error() to false. If the error flag is set at the
+  /// time when this raw_ostream's destructor is called, report_fatal_error is
+  /// called to report the error. Use clear_error() after handling the error to
+  /// avoid this behavior.
   ///
   ///   "Errors should never pass silently.
   ///    Unless explicitly silenced."
@@ -423,87 +432,105 @@ public:
   }
 };
 
-/// outs() - This returns a reference to a raw_ostream for standard output.
-/// Use it like: outs() << "foo" << "bar";
+/// This returns a reference to a raw_ostream for standard output. Use it like:
+/// outs() << "foo" << "bar";
 raw_ostream &outs();
 
-/// errs() - This returns a reference to a raw_ostream for standard error.
-/// Use it like: errs() << "foo" << "bar";
+/// This returns a reference to a raw_ostream for standard error. Use it like:
+/// errs() << "foo" << "bar";
 raw_ostream &errs();
 
-/// nulls() - This returns a reference to a raw_ostream which simply discards
-/// output.
+/// This returns a reference to a raw_ostream which simply discards output.
 raw_ostream &nulls();
 
 //===----------------------------------------------------------------------===//
 // Output Stream Adaptors
 //===----------------------------------------------------------------------===//
 
-/// raw_string_ostream - A raw_ostream that writes to an std::string.  This is a
-/// simple adaptor class. This class does not encounter output errors.
+/// A raw_ostream that writes to an std::string.  This is a simple adaptor
+/// class. This class does not encounter output errors.
 class raw_string_ostream : public raw_ostream {
   std::string &OS;
 
-  /// write_impl - See raw_ostream::write_impl.
+  /// See raw_ostream::write_impl.
   void write_impl(const char *Ptr, size_t Size) override;
 
-  /// current_pos - Return the current position within the stream, not
-  /// counting the bytes currently in the buffer.
+  /// Return the current position within the stream, not counting the bytes
+  /// currently in the buffer.
   uint64_t current_pos() const override { return OS.size(); }
 public:
   explicit raw_string_ostream(std::string &O) : OS(O) {}
-  ~raw_string_ostream();
+  ~raw_string_ostream() override;
 
-  /// str - Flushes the stream contents to the target string and returns
-  ///  the string's reference.
+  /// Flushes the stream contents to the target string and returns  the string's
+  /// reference.
   std::string& str() {
     flush();
     return OS;
   }
 };
 
-/// raw_svector_ostream - A raw_ostream that writes to an SmallVector or
-/// SmallString.  This is a simple adaptor class. This class does not
-/// encounter output errors.
-class raw_svector_ostream : public raw_ostream {
+/// A raw_ostream that writes to an SmallVector or SmallString.  This is a
+/// simple adaptor class. This class does not encounter output errors.
+class raw_svector_ostream : public raw_pwrite_stream {
   SmallVectorImpl<char> &OS;
 
-  /// write_impl - See raw_ostream::write_impl.
+  /// See raw_ostream::write_impl.
   void write_impl(const char *Ptr, size_t Size) override;
 
-  /// current_pos - Return the current position within the stream, not
-  /// counting the bytes currently in the buffer.
+  /// Return the current position within the stream, not counting the bytes
+  /// currently in the buffer.
   uint64_t current_pos() const override;
+
+protected:
+  // Like the regular constructor, but doesn't call init.
+  explicit raw_svector_ostream(SmallVectorImpl<char> &O, unsigned);
+  void init();
+
 public:
   /// Construct a new raw_svector_ostream.
   ///
   /// \param O The vector to write to; this should generally have at least 128
   /// bytes free to avoid any extraneous memory overhead.
   explicit raw_svector_ostream(SmallVectorImpl<char> &O);
-  ~raw_svector_ostream();
+  ~raw_svector_ostream() override;
 
-  /// resync - This is called when the SmallVector we're appending to is changed
-  /// outside of the raw_svector_ostream's control.  It is only safe to do this
-  /// if the raw_svector_ostream has previously been flushed.
+  void pwrite(const char *Ptr, size_t Size, uint64_t Offset) override;
+
+  /// This is called when the SmallVector we're appending to is changed outside
+  /// of the raw_svector_ostream's control.  It is only safe to do this if the
+  /// raw_svector_ostream has previously been flushed.
   void resync();
 
-  /// str - Flushes the stream contents to the target vector and return a
-  /// StringRef for the vector contents.
+  /// Flushes the stream contents to the target vector and return a StringRef
+  /// for the vector contents.
   StringRef str();
 };
 
-/// raw_null_ostream - A raw_ostream that discards all output.
-class raw_null_ostream : public raw_ostream {
-  /// write_impl - See raw_ostream::write_impl.
+/// A raw_ostream that discards all output.
+class raw_null_ostream : public raw_pwrite_stream {
+  /// See raw_ostream::write_impl.
   void write_impl(const char *Ptr, size_t size) override;
 
-  /// current_pos - Return the current position within the stream, not
-  /// counting the bytes currently in the buffer.
+  /// Return the current position within the stream, not counting the bytes
+  /// currently in the buffer.
   uint64_t current_pos() const override;
 
 public:
   explicit raw_null_ostream() {}
-  ~raw_null_ostream();
+  ~raw_null_ostream() override;
+  void pwrite(const char *Ptr, size_t Size, uint64_t Offset) override;
+};
+
+class buffer_ostream : public raw_svector_ostream {
+  raw_ostream &OS;
+  SmallVector<char, 0> Buffer;
+
+public:
+  buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer, 0), OS(OS) {
+    init();
+  }
+  ~buffer_ostream() { OS << str(); }
 };
 
 } // end llvm namespace
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index 5233493..e746e7d 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -1442,8 +1442,6 @@ public:
     TheInit(O.TheInit), IsAnonymous(O.IsAnonymous),
     ResolveFirst(O.ResolveFirst) { }
 
-  ~Record() {}
-
   static unsigned getNewUID() { return LastID++; }
 
   unsigned getID() const { return ID; }
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 6c970d0..1ff8db8 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -207,6 +207,12 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // The function should return 0 to select the default order defined by
   // MemberList, 1 to select the first AltOrders entry and so on.
   code AltOrderSelect = [{}];
+
+  // Specify allocation priority for register allocators using a greedy
+  // heuristic. Classes with higher priority values are assigned first. This is
+  // useful as it is sometimes beneficial to assign registers to highly
+  // constrained classes first. The value has to be in the range [0,63].
+  int AllocationPriority = 0;
 }
 
 // The memberList in a RegisterClass is a dag of set operations. TableGen
@@ -1015,6 +1021,11 @@ class AsmWriter {
   // name.
   string AsmWriterClassName  = "InstPrinter";
 
+  // PassSubtarget - Determines whether MCSubtargetInfo should be passed to
+  // the various print methods.
+  // FIXME: Remove after all ports are updated.
+  int PassSubtarget = 0;
+
   // Variant - AsmWriters can be of multiple different variants.  Variants are
   // used to support targets that need to emit assembly code in ways that are
   // mostly the same for different targets, but have minor differences in
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 87aba9f..2a1ce04 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -48,6 +48,7 @@ class TargetSubtargetInfo;
 class TargetTransformInfo;
 class formatted_raw_ostream;
 class raw_ostream;
+class raw_pwrite_stream;
 class TargetLoweringObjectFile;
 
 // The old pass manager infrastructure is hidden in a legacy namespace now.
@@ -58,9 +59,9 @@ using legacy::PassManagerBase;
 
 //===----------------------------------------------------------------------===//
 ///
-/// TargetMachine - Primary interface to the complete machine description for
-/// the target machine.  All target-specific information should be accessible
-/// through this interface.
+/// Primary interface to the complete machine description for the target
+/// machine.  All target-specific information should be accessible through this
+/// interface.
 ///
 class TargetMachine {
   TargetMachine(const TargetMachine &) = delete;
@@ -70,25 +71,25 @@ protected: // Can only create subclasses.
                 StringRef TargetTriple, StringRef CPU, StringRef FS,
                 const TargetOptions &Options);
 
-  /// TheTarget - The Target that this machine was created for.
+  /// The Target that this machine was created for.
   const Target &TheTarget;
 
-  /// DataLayout - For ABI type size and alignment.
+  /// For ABI type size and alignment.
   const DataLayout DL;
 
-  /// TargetTriple, TargetCPU, TargetFS - Triple string, CPU name, and target
-  /// feature strings the TargetMachine instance is created with.
+  /// Triple string, CPU name, and target feature strings the TargetMachine
+  /// instance is created with.
   std::string TargetTriple;
   std::string TargetCPU;
   std::string TargetFS;
 
-  /// CodeGenInfo - Low level target information such as relocation model.
-  /// Non-const to allow resetting optimization level per-function.
+  /// Low level target information such as relocation model. Non-const to
+  /// allow resetting optimization level per-function.
   MCCodeGenInfo *CodeGenInfo;
 
-  /// AsmInfo - Contains target specific asm information.
-  ///
+  /// Contains target specific asm information.
   const MCAsmInfo *AsmInfo;
+
   const MCRegisterInfo *MRI;
   const MCInstrInfo *MII;
   const MCSubtargetInfo *STI;
@@ -106,8 +107,8 @@ public:
   StringRef getTargetCPU() const { return TargetCPU; }
   StringRef getTargetFeatureString() const { return TargetFS; }
 
-  /// getSubtargetImpl - virtual method implemented by subclasses that returns
-  /// a reference to that target's TargetSubtargetInfo-derived member variable.
+  /// Virtual method implemented by subclasses that returns a reference to that
+  /// target's TargetSubtargetInfo-derived member variable.
   virtual const TargetSubtargetInfo *getSubtargetImpl(const Function &) const {
     return nullptr;
   }
@@ -115,15 +116,15 @@ public:
     return nullptr;
   }
 
-  /// getSubtarget - This method returns a pointer to the specified type of
+  /// This method returns a pointer to the specified type of
   /// TargetSubtargetInfo.  In debug builds, it verifies that the object being
   /// returned is of the correct type.
   template <typename STC> const STC &getSubtarget(const Function &F) const {
     return *static_cast<const STC*>(getSubtargetImpl(F));
   }
 
-  /// getDataLayout - This method returns a pointer to the DataLayout for
-  /// the target. It should be unchanging for every subtarget.
+  /// This method returns a pointer to the DataLayout for the target. It should
+  /// be unchanging for every subtarget.
   const DataLayout *getDataLayout() const { return &DL; }
 
   /// \brief Reset the target options based on the function's attributes.
@@ -131,16 +132,14 @@ public:
   // from TargetMachine.
   void resetTargetOptions(const Function &F) const;
 
-  /// getMCAsmInfo - Return target specific asm information.
-  ///
+  /// Return target specific asm information.
   const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; }
+
   const MCRegisterInfo *getMCRegisterInfo() const { return MRI; }
   const MCInstrInfo *getMCInstrInfo() const { return MII; }
   const MCSubtargetInfo *getMCSubtargetInfo() const { return STI; }
 
-  /// getIntrinsicInfo - If intrinsic information is available, return it.  If
-  /// not, return null.
-  ///
+  /// If intrinsic information is available, return it.  If not, return null.
   virtual const TargetIntrinsicInfo *getIntrinsicInfo() const {
     return nullptr;
   }
@@ -148,20 +147,18 @@ public:
   bool requiresStructuredCFG() const { return RequireStructuredCFG; }
   void setRequiresStructuredCFG(bool Value) { RequireStructuredCFG = Value; }
 
-  /// getRelocationModel - Returns the code generation relocation model. The
-  /// choices are static, PIC, and dynamic-no-pic, and target default.
+  /// Returns the code generation relocation model. The choices are static, PIC,
+  /// and dynamic-no-pic, and target default.
   Reloc::Model getRelocationModel() const;
 
-  /// getCodeModel - Returns the code model. The choices are small, kernel,
-  /// medium, large, and target default.
+  /// Returns the code model. The choices are small, kernel, medium, large, and
+  /// target default.
   CodeModel::Model getCodeModel() const;
 
-  /// getTLSModel - Returns the TLS model which should be used for the given
-  /// global variable.
+  /// Returns the TLS model which should be used for the given global variable.
   TLSModel::Model getTLSModel(const GlobalValue *GV) const;
 
-  /// getOptLevel - Returns the optimization level: None, Less,
-  /// Default, or Aggressive.
+  /// Returns the optimization level: None, Less, Default, or Aggressive.
   CodeGenOpt::Level getOptLevel() const;
 
   /// \brief Overrides the optimization level.
@@ -198,21 +195,20 @@ public:
   /// uses this to answer queries about the IR.
   virtual TargetIRAnalysis getTargetIRAnalysis();
 
-  /// CodeGenFileType - These enums are meant to be passed into
-  /// addPassesToEmitFile to indicate what type of file to emit, and returned by
-  /// it to indicate what type of file could actually be made.
+  /// These enums are meant to be passed into addPassesToEmitFile to indicate
+  /// what type of file to emit, and returned by it to indicate what type of
+  /// file could actually be made.
   enum CodeGenFileType {
     CGFT_AssemblyFile,
     CGFT_ObjectFile,
     CGFT_Null         // Do not emit any output.
   };
 
-  /// addPassesToEmitFile - Add passes to the specified pass manager to get the
-  /// specified file emitted.  Typically this will involve several steps of code
-  /// generation.  This method should return true if emission of this file type
-  /// is not supported, or false on success.
-  virtual bool addPassesToEmitFile(PassManagerBase &,
-                                   formatted_raw_ostream &,
+  /// Add passes to the specified pass manager to get the specified file
+  /// emitted.  Typically this will involve several steps of code generation.
+  /// This method should return true if emission of this file type is not
+  /// supported, or false on success.
+  virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &,
                                    CodeGenFileType,
                                    bool /*DisableVerify*/ = true,
                                    AnalysisID /*StartAfter*/ = nullptr,
@@ -220,14 +216,13 @@ public:
     return true;
   }
 
-  /// addPassesToEmitMC - Add passes to the specified pass manager to get
-  /// machine code emitted with the MCJIT. This method returns true if machine
-  /// code is not supported. It fills the MCContext Ctx pointer which can be
-  /// used to build custom MCStreamer.
+  /// Add passes to the specified pass manager to get machine code emitted with
+  /// the MCJIT. This method returns true if machine code is not supported. It
+  /// fills the MCContext Ctx pointer which can be used to build custom
+  /// MCStreamer.
   ///
-  virtual bool addPassesToEmitMC(PassManagerBase &,
-                                 MCContext *&,
-                                 raw_ostream &,
+  virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&,
+                                 raw_pwrite_stream &,
                                  bool /*DisableVerify*/ = true) {
     return true;
   }
@@ -237,8 +232,8 @@ public:
   MCSymbol *getSymbol(const GlobalValue *GV, Mangler &Mang) const;
 };
 
-/// LLVMTargetMachine - This class describes a target machine that is
-/// implemented with the LLVM target-independent code generator.
+/// This class describes a target machine that is implemented with the LLVM
+/// target-independent code generator.
 ///
 class LLVMTargetMachine : public TargetMachine {
 protected: // Can only create subclasses.
@@ -255,25 +250,24 @@ public:
   /// generator to answer queries about the IR.
   TargetIRAnalysis getTargetIRAnalysis() override;
 
-  /// createPassConfig - Create a pass configuration object to be used by
-  /// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+  /// Create a pass configuration object to be used by addPassToEmitX methods
+  /// for generating a pipeline of CodeGen passes.
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 
-  /// addPassesToEmitFile - Add passes to the specified pass manager to get the
-  /// specified file emitted.  Typically this will involve several steps of code
-  /// generation.
-  bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
+  /// Add passes to the specified pass manager to get the specified file
+  /// emitted.  Typically this will involve several steps of code generation.
+  bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
                            CodeGenFileType FileType, bool DisableVerify = true,
                            AnalysisID StartAfter = nullptr,
                            AnalysisID StopAfter = nullptr) override;
 
-  /// addPassesToEmitMC - Add passes to the specified pass manager to get
-  /// machine code emitted with the MCJIT. This method returns true if machine
-  /// code is not supported. It fills the MCContext Ctx pointer which can be
-  /// used to build custom MCStreamer.
-  ///
+  /// Add passes to the specified pass manager to get machine code emitted with
+  /// the MCJIT. This method returns true if machine code is not supported. It
+  /// fills the MCContext Ctx pointer which can be used to build custom
+  /// MCStreamer.
   bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
-                         raw_ostream &OS, bool DisableVerify = true) override;
+                         raw_pwrite_stream &OS,
+                         bool DisableVerify = true) override;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index f447fd6..d41ca99 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -212,9 +212,9 @@ namespace llvm {
     /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
     /// on the command line. This setting may either be Default, Soft, or Hard.
     /// Default selects the target's default behavior. Soft selects the ABI for
-    /// UseSoftFloat, but does not indicate that FP hardware may not be used.
-    /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
-    /// Hard presumes that the normal FP ABI is used.
+    /// software floating point, but does not indicate that FP hardware may not
+    /// be used. Such a combination is unfortunately popular (e.g.
+    /// arm-apple-darwin). Hard presumes that the normal FP ABI is used.
     FloatABI::ABIType FloatABIType;
 
     /// AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 4184052..121b8a2 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -46,6 +46,9 @@ public:
   const uint32_t *SubClassMask;
   const uint16_t *SuperRegIndices;
   const unsigned LaneMask;
+  /// Classes with a higher priority value are assigned first by register
+  /// allocators using a greedy heuristic. The value is in the range [0,63].
+  const uint8_t AllocationPriority;
   /// Whether the class supports two (or more) disjunct subregister indices.
   const bool HasDisjunctSubRegs;
   const sc_iterator SuperClasses;
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index bb5409b6..0f42790 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -42,15 +42,17 @@ template <typename T> class SmallVectorImpl;
 /// be exposed through a TargetSubtargetInfo-derived class.
 ///
 class TargetSubtargetInfo : public MCSubtargetInfo {
-  TargetSubtargetInfo(const TargetSubtargetInfo&) = delete;
-  void operator=(const TargetSubtargetInfo&) = delete;
+  TargetSubtargetInfo(const TargetSubtargetInfo &) = delete;
+  void operator=(const TargetSubtargetInfo &) = delete;
+
 protected: // Can only create subclasses...
   TargetSubtargetInfo();
+
 public:
   // AntiDepBreakMode - Type of anti-dependence breaking that should
   // be performed before post-RA scheduling.
   typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode;
-  typedef SmallVectorImpl<const TargetRegisterClass*> RegClassVector;
+  typedef SmallVectorImpl<const TargetRegisterClass *> RegClassVector;
 
   virtual ~TargetSubtargetInfo();
 
@@ -89,8 +91,9 @@ public:
   /// MCSchedClassDesc with the isVariant property. This may return the ID of
   /// another variant SchedClass, but repeated invocation must quickly terminate
   /// in a nonvariant SchedClass.
-  virtual unsigned resolveSchedClass(unsigned SchedClass, const MachineInstr *MI,
-                                     const TargetSchedModel* SchedModel) const {
+  virtual unsigned resolveSchedClass(unsigned SchedClass,
+                                     const MachineInstr *MI,
+                                     const TargetSchedModel *SchedModel) const {
     return 0;
   }
 
@@ -128,20 +131,16 @@ public:
   /// scheduling heuristics (no custom MachineSchedStrategy) to make
   /// changes to the generic scheduling policy.
   virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
-                                   MachineInstr *begin,
-                                   MachineInstr *end,
+                                   MachineInstr *begin, MachineInstr *end,
                                    unsigned NumRegionInstrs) const {}
 
   // \brief Perform target specific adjustments to the latency of a schedule
   // dependency.
-  virtual void adjustSchedDependency(SUnit *def, SUnit *use,
-                                     SDep& dep) const { }
+  virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const {}
 
   // For use with PostRAScheduling: get the anti-dependence breaking that should
   // be performed before post-RA scheduling.
-  virtual AntiDepBreakMode getAntiDepBreakMode() const {
-    return ANTIDEP_NONE;
-  }
+  virtual AntiDepBreakMode getAntiDepBreakMode() const { return ANTIDEP_NONE; }
 
   // For use with PostRAScheduling: in CriticalPathRCs, return any register
   // classes that should only be considered for anti-dependence breaking if they
@@ -177,9 +176,7 @@ public:
   }
 
   /// Enable tracking of subregister liveness in register allocator.
-  virtual bool enableSubRegLiveness() const {
-    return false;
-  }
+  virtual bool enableSubRegLiveness() const { return false; }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 2d754d0..925ebda 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -427,6 +427,11 @@ createSeparateConstOffsetFromGEPPass(const TargetMachine *TM = nullptr,
 //
 BasicBlockPass *createLoadCombinePass();
 
+//===----------------------------------------------------------------------===//
+//
+// StraightLineStrengthReduce - This pass strength-reduces some certain
+// instruction patterns in straight-line code.
+//
 FunctionPass *createStraightLineStrengthReducePass();
 
 //===----------------------------------------------------------------------===//
@@ -446,6 +451,18 @@ ModulePass *createPlaceSafepointsPass();
 //
 FunctionPass *createRewriteStatepointsForGCPass();
 
+//===----------------------------------------------------------------------===//
+//
+// Float2Int - Demote floats to ints where possible.
+//
+FunctionPass *createFloat2IntPass();
+
+//===----------------------------------------------------------------------===//
+//
+// NaryReassociate - Simplify n-ary operations by reassociation.
+//
+FunctionPass *createNaryReassociatePass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 2eb2555..e8f3172 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -122,6 +122,10 @@ bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
 /// Updates safety information in LICMSafetyInfo argument.
 void computeLICMSafetyInfo(LICMSafetyInfo *, Loop *);
 
+/// \brief Checks if the given PHINode in a loop header is an induction
+/// variable. Returns true if this is an induction PHI along with the step
+/// value.
+bool isInductionPHI(PHINode *, ScalarEvolution *, ConstantInt *&);
 }
 
 #endif
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index 16904f1..907563e 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -20,6 +20,7 @@ class Module;
 class Function;
 class GlobalValue;
 class GlobalVariable;
+class Constant;
 template <class PtrType> class SmallPtrSetImpl;
 
 /// Append F to the list of global ctors of module M with the given Priority.
@@ -36,6 +37,12 @@ void appendToGlobalDtors(Module &M, Function *F, int Priority);
 GlobalVariable *collectUsedGlobalVariables(Module &M,
                                            SmallPtrSetImpl<GlobalValue *> &Set,
                                            bool CompilerUsed);
+
+// Validate the result of Module::getOrInsertFunction called for an interface
+// function of given sanitizer. If the instrumented module defines a function
+// with the same name, their prototypes must match, otherwise
+// getOrInsertFunction returns a bitcast.
+Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
 } // End llvm namespace
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/include/llvm/Transforms/Utils/SymbolRewriter.h b/include/llvm/Transforms/Utils/SymbolRewriter.h
index 48dea04..5ccee98 100644
--- a/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -90,9 +90,6 @@ typedef iplist<RewriteDescriptor> RewriteDescriptorList;
 
 class RewriteMapParser {
 public:
-  RewriteMapParser() {}
-  ~RewriteMapParser() {}
-
   bool parse(const std::string &MapFile, RewriteDescriptorList *Descriptors);
 
 private:
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 04d9ee1..7f2cf8d 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -28,11 +28,13 @@ class MDNode;
 class Pass;
 
 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
-                unsigned TripMultiple, LoopInfo *LI, Pass *PP,
-                LPPassManager *LPM, AssumptionCache *AC);
+                bool AllowExpensiveTripCount, unsigned TripMultiple,
+                LoopInfo *LI, Pass *PP, LPPassManager *LPM,
+                AssumptionCache *AC);
 
-bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
-                             LPPassManager* LPM);
+bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
+                             bool AllowExpensiveTripCount, LoopInfo *LI,
+                             LPPassManager *LPM);
 
 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 }
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 0b0fd50..43db176 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) {
   AA->addEscapingUse(U);
 }
 
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+  // We may have two calls
+  if (auto CS = ImmutableCallSite(I)) {
+    // Check if the two calls modify the same memory
+    return getModRefInfo(Call, CS);
+  } else {
+    // Otherwise, check if the call modifies or references the
+    // location this memory access defines.  The best we can say
+    // is that if the call references what this instruction
+    // defines, it must be clobbered by this location.
+    const AliasAnalysis::Location DefLoc = AA->getLocation(I);
+    if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
+      return AliasAnalysis::ModRef;
+  }
+  return AliasAnalysis::NoModRef;
+}
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
@@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
 
   // If the load address doesn't alias the given address, it doesn't read
   // or write the specified memory.
-  if (!alias(getLocation(L), Loc))
+  if (Loc.Ptr && !alias(getLocation(L), Loc))
     return NoModRef;
 
   // Otherwise, a load just reads.
@@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
   if (!S->isUnordered())
     return ModRef;
 
-  // If the store address cannot alias the pointer in question, then the
-  // specified memory cannot be modified by the store.
-  if (!alias(getLocation(S), Loc))
-    return NoModRef;
+  if (Loc.Ptr) {
+    // If the store address cannot alias the pointer in question, then the
+    // specified memory cannot be modified by the store.
+    if (!alias(getLocation(S), Loc))
+      return NoModRef;
 
-  // If the pointer is a pointer to constant memory, then it could not have been
-  // modified by this store.
-  if (pointsToConstantMemory(Loc))
-    return NoModRef;
+    // If the pointer is a pointer to constant memory, then it could not have
+    // been modified by this store.
+    if (pointsToConstantMemory(Loc))
+      return NoModRef;
+
+  }
 
   // Otherwise, a store just writes.
   return Mod;
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 5865259..a1bfba1 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -44,7 +44,7 @@ namespace {
       errs() <<  "  " << Val << " " << Desc << " responses ("
              << Val*100/Sum << "%)\n";
     }
-    ~AliasAnalysisCounter() {
+    ~AliasAnalysisCounter() override {
       unsigned AASum = No+May+Partial+Must;
       unsigned MRSum = NoMR+JustRef+JustMod+MR;
       if (AASum + MRSum) { // Print a report if any counted queries occurred...
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index fe4bd4c..273eacc 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) {
     if (EvalAAMD && isa<StoreInst>(&*I))
       Stores.insert(&*I);
     Instruction &Inst = *I;
-    if (CallSite CS = cast<Value>(&Inst)) {
+    if (auto CS = CallSite(&Inst)) {
       Value *Callee = CS.getCalledValue();
       // Skip actual functions for direct function calls.
       if (!isa<Function>(Callee) && isInterestingPointer(Callee))
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 45442b0..4c79b9f 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -187,7 +187,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
     return false;
 
   for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
-    CallSite C1 = getUnknownInst(i), C2 = Inst;
+    CallSite C1(getUnknownInst(i)), C2(Inst);
     if (!C1 || !C2 ||
         AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
         AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 4549c1e..842ff0a 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCFLAliasAnalysisPass(Registry);
   initializeDependenceAnalysisPass(Registry);
   initializeDelinearizationPass(Registry);
+  initializeDivergenceAnalysisPass(Registry);
   initializeDominanceFrontierPass(Registry);
   initializeDomViewerPass(Registry);
   initializeDomPrinterPass(Registry);
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 277956c..94a1d2e 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -22,6 +22,7 @@ analysis_SRC_FILES := \
   CostModel.cpp \
   Delinearization.cpp \
   DependenceAnalysis.cpp \
+  DivergenceAnalysis.cpp \
   DomPrinter.cpp \
   DominanceFrontier.cpp \
   IVUsers.cpp \
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index be2282f..2767e41 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -932,14 +932,14 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
   // Also, check that they all index through arrays.
   for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
     if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
-            GEP1->getPointerOperandType(), IntermediateIndices)))
+            GEP1->getSourceElementType(), IntermediateIndices)))
       return AliasAnalysis::MayAlias;
     IntermediateIndices.push_back(GEP1->getOperand(i + 1));
   }
 
   StructType *LastIndexedStruct =
       dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
-          GEP1->getPointerOperandType(), IntermediateIndices));
+          GEP1->getSourceElementType(), IntermediateIndices));
 
   if (!LastIndexedStruct)
     return AliasAnalysis::MayAlias;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 37f2fae..3d819eb 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
     std::string Result;
     raw_string_ostream OS(Result);
 
-    OS << Node->getName().str() << ":";
+    OS << Node->getName() << ":";
     switch (ViewBlockFreqPropagationDAG) {
     case GVDT_Fraction:
       Graph->printBlockFreq(OS, Node);
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 278073c..456cee1 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
   return true;
 }
 
-/// \brief Get the maximum allowed loop scale.
-///
-/// Gives the maximum number of estimated iterations allowed for a loop.  Very
-/// large numbers cause problems downstream (even within 64-bits).
-static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
-
 /// \brief Compute the loop scale for a loop.
 void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
   // Compute loop scale.
   DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
 
+  // Infinite loops need special handling. If we give the back edge an infinite
+  // mass, they may saturate all the other scales in the function down to 1,
+  // making all the other region temperatures look exactly the same. Choose an
+  // arbitrary scale to avoid these issues.
+  //
+  // FIXME: An alternate way would be to select a symbolic scale which is later
+  // replaced to be the maximum of all computed scales plus 1. This would
+  // appropriately describe the loop as having a large scale, without skewing
+  // the final frequency computation.
+  const Scaled64 InifiniteLoopScale(1, 12);
+
   // LoopScale == 1 / ExitMass
   // ExitMass == HeadMass - BackedgeMass
   BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
 
-  // Block scale stores the inverse of the scale.
-  Loop.Scale = ExitMass.toScaled().inverse();
+  // Block scale stores the inverse of the scale. If this is an infinite loop,
+  // its exit mass will be zero. In this case, use an arbitrary scale for the
+  // loop scale.
+  Loop.Scale =
+      ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
 
   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
                << " - " << Loop.BackedgeMass << ")\n"
                << " - scale = " << Loop.Scale << "\n");
-
-  if (Loop.Scale > getMaxLoopScale()) {
-    Loop.Scale = getMaxLoopScale();
-    DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
-  }
 }
 
 /// \brief Package up a loop.
@@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
                                      const Scaled64 &Min, const Scaled64 &Max) {
   // Scale the Factor to a size that creates integers.  Ideally, integers would
   // be scaled so that Max == UINT64_MAX so that they can be best
-  // differentiated.  However, the register allocator currently deals poorly
-  // with large numbers.  Instead, push Min up a little from 1 to give some
-  // room to differentiate small, unequal numbers.
-  //
-  // TODO: fix issues downstream so that ScalingFactor can be
-  // Scaled64(1,64)/Max.
-  Scaled64 ScalingFactor = Min.inverse();
-  if ((Max / Min).lg() < 60)
+  // differentiated.  However, in the presence of large frequency values, small
+  // frequencies are scaled down to 1, making it impossible to differentiate
+  // small, unequal numbers. When the spread between Min and Max frequencies
+  // fits well within MaxBits, we make the scale be at least 8.
+  const unsigned MaxBits = 64;
+  const unsigned SpreadBits = (Max / Min).lg();
+  Scaled64 ScalingFactor;
+  if (SpreadBits <= MaxBits - 3) {
+    // If the values are small enough, make the scaling factor at least 8 to
+    // allow distinguishing small values.
+    ScalingFactor = Min.inverse();
     ScalingFactor <<= 3;
+  } else {
+    // If the values need more than MaxBits to be represented, saturate small
+    // frequency values down to 1 by using a scaling factor that benefits large
+    // frequency values.
+    ScalingFactor = Scaled64(1, MaxBits) / Max;
+  }
 
   // Translate the floats to integers.
   DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 14800f4..8799a71 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -379,6 +379,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
   if (!CV)
     return false;
 
+  // If the LHS is the result of AND'ing a value with a single bit bitmask,
+  // we don't have information about probabilities.
+  if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
+    if (LHS->getOpcode() == Instruction::And)
+      if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
+        if (AndRHS->getUniqueInteger().isPowerOf2())
+          return false;
+
   bool isProb;
   if (CV->isZero()) {
     switch (CI->getPredicate()) {
@@ -499,25 +507,23 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
 
   // Walk the basic blocks in post-order so that we can build up state about
   // the successors of a block iteratively.
-  for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
-                                 E = po_end(&F.getEntryBlock());
-       I != E; ++I) {
-    DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
-    if (calcUnreachableHeuristics(*I))
+  for (auto BB : post_order(&F.getEntryBlock())) {
+    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+    if (calcUnreachableHeuristics(BB))
       continue;
-    if (calcMetadataWeights(*I))
+    if (calcMetadataWeights(BB))
       continue;
-    if (calcColdCallHeuristics(*I))
+    if (calcColdCallHeuristics(BB))
       continue;
-    if (calcLoopBranchHeuristics(*I))
+    if (calcLoopBranchHeuristics(BB))
       continue;
-    if (calcPointerHeuristics(*I))
+    if (calcPointerHeuristics(BB))
       continue;
-    if (calcZeroHeuristics(*I))
+    if (calcZeroHeuristics(BB))
       continue;
-    if (calcFloatingPointHeuristics(*I))
+    if (calcFloatingPointHeuristics(BB))
       continue;
-    calcInvokeHeuristics(*I);
+    calcInvokeHeuristics(BB);
   }
 
   PostDominatedByUnreachable.clear();
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 89787f82..c86f1f5 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -77,7 +77,7 @@ namespace {
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = "cfg." + F.getName().str() + ".dot";
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
       errs() << "Writing '" << Filename << "'...";
 
       std::error_code EC;
@@ -111,7 +111,7 @@ namespace {
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = "cfg." + F.getName().str() + ".dot";
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
       errs() << "Writing '" << Filename << "'...";
 
       std::error_code EC;
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
index 53d748d..3147992 100644
--- a/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -161,7 +161,7 @@ struct FunctionHandle : public CallbackVH {
     assert(CFLAA != nullptr);
   }
 
-  virtual ~FunctionHandle() {}
+  ~FunctionHandle() override {}
 
   void deleted() override { removeSelfFromCache(); }
   void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
@@ -189,7 +189,7 @@ public:
     initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual ~CFLAliasAnalysis() {}
+  ~CFLAliasAnalysis() override {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AliasAnalysis::getAnalysisUsage(AU);
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index ae40321..1335a6d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMAnalysis
   ConstantFolding.cpp
   Delinearization.cpp
   DependenceAnalysis.cpp
+  DivergenceAnalysis.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
   IVUsers.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 995465d..a85e813 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -671,8 +671,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 /// If array indices are not pointer-sized integers, explicitly cast them so
 /// that they aren't implicitly casted by the getelementptr.
-static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
-                                const DataLayout &DL,
+static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops,
+                                Type *ResultTy, const DataLayout &DL,
                                 const TargetLibraryInfo *TLI) {
   Type *IntPtrTy = DL.getIntPtrType(ResultTy);
 
@@ -681,8 +681,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
     if ((i == 1 ||
          !isa<StructType>(GetElementPtrInst::getIndexedType(
-                            Ops[0]->getType(),
-                            Ops.slice(1, i - 1)))) &&
+             cast<PointerType>(Ops[0]->getType()->getScalarType())
+                 ->getElementType(),
+             Ops.slice(1, i - 1)))) &&
         Ops[i]->getType() != IntPtrTy) {
       Any = true;
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -697,7 +698,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
   if (!Any)
     return nullptr;
 
-  Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
       C = Folded;
@@ -723,7 +724,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
 }
 
 /// If we can symbolically evaluate the GEP constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
+static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops,
                                          Type *ResultTy, const DataLayout &DL,
                                          const TargetLibraryInfo *TLI) {
   Constant *Ptr = Ops[0];
@@ -865,7 +866,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
     return nullptr;
 
   // Create a GEP.
-  Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs);
   assert(C->getType()->getPointerElementType() == Ty &&
          "Computed GetElementPtr has unexpected type!");
 
@@ -1085,13 +1086,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
-  case Instruction::GetElementPtr:
-    if (Constant *C = CastGEPIndices(Ops, DestTy, DL, TLI))
+  case Instruction::GetElementPtr: {
+    Type *SrcTy = nullptr;
+    if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI))
       return C;
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, DL, TLI))
+    if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI))
       return C;
 
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
+    return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1));
+  }
   }
 }
 
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
new file mode 100644
index 0000000..e5ee295
--- /dev/null
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -0,0 +1,337 @@
+//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines divergence analysis which determines whether a branch in a
+// GPU program is divergent. It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+//   Divergence Analysis
+//   Sampaio, Souza, Collange, Pereira
+//   TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+//   %cond = icmp slt i32 %tid, 10
+//   br i1 %cond, label %then, label %else
+// then:
+//   br label %merge
+// else:
+//   br label %merge
+// merge:
+//   %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+//    non-kernel-entry function and the return value of a function call as
+//    divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+//    generic or local address as divergent. This can be improved by leveraging
+//    pointer analysis.
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+#include "llvm/IR/Dominators.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+namespace {
+class DivergenceAnalysis : public FunctionPass {
+public:
+  static char ID;
+
+  DivergenceAnalysis() : FunctionPass(ID) {
+    initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTree>();
+    AU.setPreservesAll();
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  // Print all divergent branches in the function.
+  void print(raw_ostream &OS, const Module *) const override;
+
+  // Returns true if V is divergent.
+  bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+  // Returns true if V is uniform/non-divergent.
+  bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+private:
+  // Stores all divergent values.
+  DenseSet<const Value *> DivergentValues;
+};
+} // End of anonymous namespace
+
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                    false, true)
+
+namespace {
+
+class DivergencePropagator {
+public:
+  DivergencePropagator(Function &F, TargetTransformInfo &TTI,
+                       DominatorTree &DT, PostDominatorTree &PDT,
+                       DenseSet<const Value *> &DV)
+      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+  void populateWithSourcesOfDivergence();
+  void propagate();
+
+private:
+  // A helper function that explores data dependents of V.
+  void exploreDataDependency(Value *V);
+  // A helper function that explores sync dependents of TI.
+  void exploreSyncDependency(TerminatorInst *TI);
+  // Computes the influence region from Start to End. This region includes all
+  // basic blocks on any path from Start to End.
+  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+                              DenseSet<BasicBlock *> &InfluenceRegion);
+  // Finds all users of I that are outside the influence region, and add these
+  // users to Worklist.
+  void findUsersOutsideInfluenceRegion(
+      Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+  Function &F;
+  TargetTransformInfo &TTI;
+  DominatorTree &DT;
+  PostDominatorTree &PDT;
+  std::vector<Value *> Worklist; // Stack for DFS.
+  DenseSet<const Value *> &DV; // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+  Worklist.clear();
+  DV.clear();
+  for (auto &I : inst_range(F)) {
+    if (TTI.isSourceOfDivergence(&I)) {
+      Worklist.push_back(&I);
+      DV.insert(&I);
+    }
+  }
+  for (auto &Arg : F.args()) {
+    if (TTI.isSourceOfDivergence(&Arg)) {
+      Worklist.push_back(&Arg);
+      DV.insert(&Arg);
+    }
+  }
+}
+
+void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
+  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+  // immediate post dominator are divergent. This rule handles if-then-else
+  // patterns. For example,
+  //
+  // if (tid < 5)
+  //   a1 = 1;
+  // else
+  //   a2 = 2;
+  // a = phi(a1, a2); // sync dependent on (tid < 5)
+  BasicBlock *ThisBB = TI->getParent();
+  BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock();
+  if (IPostDom == nullptr)
+    return;
+
+  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+    // A PHINode is uniform if it returns the same value no matter which path is
+    // taken.
+    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
+      Worklist.push_back(I);
+  }
+
+  // Propagation rule 2: if a value defined in a loop is used outside, the user
+  // is sync dependent on the condition of the loop exits that dominate the
+  // user. For example,
+  //
+  // int i = 0;
+  // do {
+  //   i++;
+  //   if (foo(i)) ... // uniform
+  // } while (i < tid);
+  // if (bar(i)) ...   // divergent
+  //
+  // A program may contain unstructured loops. Therefore, we cannot leverage
+  // LoopInfo, which only recognizes natural loops.
+  //
+  // The algorithm used here handles both natural and unstructured loops.  Given
+  // a branch TI, we first compute its influence region, the union of all simple
+  // paths from TI to its immediate post dominator (IPostDom). Then, we search
+  // for all the values defined in the influence region but used outside. All
+  // these users are sync dependent on TI.
+  DenseSet<BasicBlock *> InfluenceRegion;
+  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+  // An insight that can speed up the search process is that all the in-region
+  // values that are used outside must dominate TI. Therefore, instead of
+  // searching every basic blocks in the influence region, we search all the
+  // dominators of TI until it is outside the influence region.
+  BasicBlock *InfluencedBB = ThisBB;
+  while (InfluenceRegion.count(InfluencedBB)) {
+    for (auto &I : *InfluencedBB)
+      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+    if (IDomNode == nullptr)
+      break;
+    InfluencedBB = IDomNode->getBlock();
+  }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+    Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+  for (User *U : I.users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (!InfluenceRegion.count(UserInst->getParent())) {
+      if (DV.insert(UserInst).second)
+        Worklist.push_back(UserInst);
+    }
+  }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+    BasicBlock *Start, BasicBlock *End,
+    DenseSet<BasicBlock *> &InfluenceRegion) {
+  assert(PDT.properlyDominates(End, Start) &&
+         "End does not properly dominate Start");
+  std::vector<BasicBlock *> InfluenceStack;
+  InfluenceStack.push_back(Start);
+  InfluenceRegion.insert(Start);
+  while (!InfluenceStack.empty()) {
+    BasicBlock *BB = InfluenceStack.back();
+    InfluenceStack.pop_back();
+    for (BasicBlock *Succ : successors(BB)) {
+      if (End != Succ && InfluenceRegion.insert(Succ).second)
+        InfluenceStack.push_back(Succ);
+    }
+  }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+  // Follow def-use chains of V.
+  for (User *U : V->users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (DV.insert(UserInst).second)
+      Worklist.push_back(UserInst);
+  }
+}
+
+void DivergencePropagator::propagate() {
+  // Traverse the dependency graph using DFS.
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
+      // Terminators with less than two successors won't introduce sync
+      // dependency. Ignore them.
+      if (TI->getNumSuccessors() > 1)
+        exploreSyncDependency(TI);
+    }
+    exploreDataDependency(V);
+  }
+}
+
+} /// end namespace anonymous
+
+FunctionPass *llvm::createDivergenceAnalysisPass() {
+  return new DivergenceAnalysis();
+}
+
+bool DivergenceAnalysis::runOnFunction(Function &F) {
+  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+  if (TTIWP == nullptr)
+    return false;
+
+  TargetTransformInfo &TTI = TTIWP->getTTI(F);
+  // Fast path: if the target does not have branch divergence, we do not mark
+  // any branch as divergent.
+  if (!TTI.hasBranchDivergence())
+    return false;
+
+  DivergentValues.clear();
+  DivergencePropagator DP(F, TTI,
+                          getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+                          getAnalysis<PostDominatorTree>(), DivergentValues);
+  DP.populateWithSourcesOfDivergence();
+  DP.propagate();
+  return false;
+}
+
+void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+  if (DivergentValues.empty())
+    return;
+  const Value *FirstDivergentValue = *DivergentValues.begin();
+  const Function *F;
+  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+    F = Arg->getParent();
+  } else if (const Instruction *I =
+                 dyn_cast<Instruction>(FirstDivergentValue)) {
+    F = I->getParent()->getParent();
+  } else {
+    llvm_unreachable("Only arguments and instructions can be divergent");
+  }
+
+  // Dumps all divergent values in F, arguments and then instructions.
+  for (auto &Arg : F->args()) {
+    if (DivergentValues.count(&Arg))
+      OS << "DIVERGENT:  " << Arg << "\n";
+  }
+  // Iterate instructions using inst_range to ensure a deterministic order.
+  for (auto &I : inst_range(F)) {
+    if (DivergentValues.count(&I))
+      OS << "DIVERGENT:" << I << "\n";
+  }
+}
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 9d607cc..65ba1c7 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -212,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
           // list of the same call.
           CallSites.count(I->first) ||
 
-          // If the call edge is not from a call or invoke, then the function
-          // pass RAUW'd a call with another value.  This can happen when
-          // constant folding happens of well known functions etc.
-          !CallSite(I->first)) {
+          // If the call edge is not from a call or invoke, or it is a
+          // instrinsic call, then the function pass RAUW'd a call with 
+          // another value. This can happen when constant folding happens
+          // of well known functions etc.
+          !CallSite(I->first) ||
+           (CallSite(I->first).getCalledFunction() &&
+            CallSite(I->first).getCalledFunction()->isIntrinsic())) {
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
         
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 2208f32..018ae99 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
     } else if (Operator::getOpcode(I) == Instruction::BitCast) {
       if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
         return true;
-    } else if (CallSite CS = I) {
+    } else if (auto CS = CallSite(I)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
       if (!CS.isCallee(&U)) {
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index eeb3b87..cacf70d 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -64,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool ContainsNoDuplicateCall;
   bool HasReturn;
   bool HasIndirectBr;
+  bool HasFrameEscape;
 
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize;
@@ -148,12 +149,12 @@ public:
         IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
-        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
-        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
-        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-        NumInstructionsSimplified(0), SROACostSavings(0),
-        SROACostSavingsLost(0) {}
+        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
 
   bool analyzeCall(CallSite CS);
 
@@ -743,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
       case Intrinsic::memmove:
         // SROA can usually chew through these intrinsics, but they aren't free.
         return false;
+      case Intrinsic::frameescape:
+        HasFrameEscape = true;
+        return false;
       }
     }
 
@@ -941,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
 
     // If the visit this instruction detected an uninlinable pattern, abort.
     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-        HasIndirectBr)
+        HasIndirectBr || HasFrameEscape)
       return false;
 
     // If the caller is a recursive function then we don't want to inline
@@ -1171,7 +1175,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     // returns false, and we can bail on out.
     if (!analyzeBlock(BB, EphValues)) {
       if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-          HasIndirectBr)
+          HasIndirectBr || HasFrameEscape)
         return false;
 
       // If the caller is a recursive function then we don't want to inline
@@ -1286,16 +1290,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
 
 /// \brief Test that two functions either have or have not the given attribute
 ///        at the same time.
-static bool attributeMatches(Function *F1, Function *F2,
-                             Attribute::AttrKind Attr) {
-  return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+template<typename AttrKind>
+static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
+  return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
 }
 
 /// \brief Test that there are no attribute conflicts between Caller and Callee
 ///        that prevent inlining.
 static bool functionsHaveCompatibleAttributes(Function *Caller,
                                               Function *Callee) {
-  return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+  return attributeMatches(Caller, Callee, "target-cpu") &&
+         attributeMatches(Caller, Callee, "target-features") &&
+         attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeThread);
 }
@@ -1370,6 +1376,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
       if (!ReturnsTwice && CS.isCall() &&
           cast<CallInst>(CS.getInstruction())->canReturnTwice())
         return false;
+
+      // Disallow inlining functions that call @llvm.frameescape. Doing this
+      // correctly would require major changes to the inliner.
+      if (CS.getCalledFunction() &&
+          CS.getCalledFunction()->getIntrinsicID() ==
+              llvm::Intrinsic::frameescape)
+        return false;
     }
   }
 
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 99c477d..d45f7bd 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2978,10 +2978,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // what constant folding can make out of it.
         Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
         SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
-        Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+        Constant *NewLHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesLHS);
 
         SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
-        Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+        Constant *NewRHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesRHS);
         return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
       }
     }
@@ -3241,17 +3243,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
 
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+                              const Query &Q, unsigned) {
   // The type of the GEP pointer operand.
-  PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
-  unsigned AS = PtrTy->getAddressSpace();
+  unsigned AS =
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
 
   // getelementptr P -> P.
   if (Ops.size() == 1)
     return Ops[0];
 
   // Compute the (pointer) type returned by the GEP instruction.
-  Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+  Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
   Type *GEPTy = PointerType::get(LastType, AS);
   if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
     GEPTy = VectorType::get(GEPTy, VT->getNumElements());
@@ -3264,7 +3267,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
     if (match(Ops[1], m_Zero()))
       return Ops[0];
 
-    Type *Ty = PtrTy->getElementType();
+    Type *Ty = SrcTy;
     if (Ty->isSized()) {
       Value *P;
       uint64_t C;
@@ -3318,14 +3321,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
     if (!isa<Constant>(Ops[i]))
       return nullptr;
 
-  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+  return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+                                        Ops.slice(1));
 }
 
 Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
-  return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+  return ::SimplifyGEPInst(
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(),
+      Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
 /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 1818e93..724c21f 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -177,6 +177,17 @@ void LoopAccessInfo::RuntimePointerCheck::print(
       }
 }
 
+bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+    const SmallVectorImpl<int> *PtrPartition) const {
+  unsigned NumPointers = Pointers.size();
+
+  for (unsigned I = 0; I < NumPointers; ++I)
+    for (unsigned J = I + 1; J < NumPointers; ++J)
+      if (needsChecking(I, J, PtrPartition))
+        return true;
+  return false;
+}
+
 namespace {
 /// \brief Analyses memory accesses in a loop.
 ///
@@ -1033,16 +1044,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
   for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
     StoreInst *ST = cast<StoreInst>(*I);
     Value* Ptr = ST->getPointerOperand();
-
-    if (isUniform(Ptr)) {
-      emitAnalysis(
-          LoopAccessReport(ST)
-          << "write to a loop invariant address could not be vectorized");
-      DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
-      CanVecMem = false;
-      return;
-    }
-
+    // Check for store to loop invariant address.
+    StoreToLoopInvariantAddress |= isUniform(Ptr);
     // If we did *not* see this pointer before, insert it to  the read-write
     // list. At this phase it is only a 'write' list.
     if (Seen.insert(Ptr).second) {
@@ -1211,9 +1214,8 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
 
 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
     Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
-  Instruction *tnullptr = nullptr;
   if (!PtrRtCheck.Need)
-    return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
+    return std::make_pair(nullptr, nullptr);
 
   unsigned NumPointers = PtrRtCheck.Pointers.size();
   SmallVector<TrackingVH<Value> , 2> Starts;
@@ -1284,6 +1286,9 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
     }
   }
 
+  if (!MemoryRuntimeCheck)
+    return std::make_pair(nullptr, nullptr);
+
   // We have to do this trickery because the IRBuilder might fold the check to a
   // constant expression in which case there is no Instruction anchored in a
   // the block.
@@ -1301,19 +1306,24 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const ValueToValueMap &Strides)
     : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
       TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
-      MaxSafeDepDistBytes(-1U), CanVecMem(false) {
+      MaxSafeDepDistBytes(-1U), CanVecMem(false),
+      StoreToLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
     analyzeLoop(Strides);
 }
 
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   if (CanVecMem) {
-    if (PtrRtCheck.empty())
-      OS.indent(Depth) << "Memory dependences are safe\n";
-    else
+    if (PtrRtCheck.Need)
       OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
+    else
+      OS.indent(Depth) << "Memory dependences are safe\n";
   }
 
+  OS.indent(Depth) << "Store to invariant address was "
+                   << (StoreToLoopInvariantAddress ? "" : "not ")
+                   << "found in loop.\n";
+
   if (Report)
     OS.indent(Depth) << "Report: " << Report->str() << "\n";
 
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index e1b7b4b..da3b829 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
     if (!Res.isNonLocal()) {
       Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
                                        static_cast<BasicBlock *>(nullptr)));
-    } else if (CallSite CS = cast<Value>(Inst)) {
+    } else if (auto CS = CallSite(Inst)) {
       const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
         MDA.getNonLocalCallDependency(CS);
 
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 716e3e6..84769cb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -223,7 +223,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
       continue;
     }
 
-    if (CallSite InstCS = cast<Value>(Inst)) {
+    if (auto InstCS = CallSite(Inst)) {
       // Debug intrinsics don't cause dependences.
       if (isa<DbgInfoIntrinsic>(Inst)) continue;
       // If these two calls do not interfere, look past it.
@@ -874,23 +874,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 void MemoryDependenceAnalysis::
 getNonLocalPointerDependency(Instruction *QueryInst,
                              SmallVectorImpl<NonLocalDepResult> &Result) {
-
-  auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) {
-    if (auto *I = dyn_cast<LoadInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<StoreInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<VAArgInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
-      return AA->getLocation(I);
-    else
-      llvm_unreachable("unsupported memory instruction");
-  };
-   
-  const AliasAnalysis::Location Loc = getLocation(AA, QueryInst);
+  const AliasAnalysis::Location Loc = AA->getLocation(QueryInst);
   bool isLoad = isa<LoadInst>(QueryInst);
   BasicBlock *FromBB = QueryInst->getParent();
   assert(FromBB);
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index cbc4700..f2a11cb 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -72,55 +72,53 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
   // Printing the nodes directly isn't particularly helpful (since they
   // reference other nodes that won't be printed, particularly for the
   // filenames), so just print a few useful things.
-  for (DICompileUnit CU : Finder.compile_units()) {
+  for (MDCompileUnit *CU : Finder.compile_units()) {
     O << "Compile unit: ";
-    if (const char *Lang = LanguageString(CU.getLanguage()))
+    if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
       O << Lang;
     else
-      O << "unknown-language(" << CU.getLanguage() << ")";
-    printFile(O, CU.getFilename(), CU.getDirectory());
+      O << "unknown-language(" << CU->getSourceLanguage() << ")";
+    printFile(O, CU->getFilename(), CU->getDirectory());
     O << '\n';
   }
 
-  for (DISubprogram S : Finder.subprograms()) {
-    O << "Subprogram: " << S.getName();
-    printFile(O, S.getFilename(), S.getDirectory(), S.getLineNumber());
-    if (!S.getLinkageName().empty())
-      O << " ('" << S.getLinkageName() << "')";
+  for (MDSubprogram *S : Finder.subprograms()) {
+    O << "Subprogram: " << S->getName();
+    printFile(O, S->getFilename(), S->getDirectory(), S->getLine());
+    if (!S->getLinkageName().empty())
+      O << " ('" << S->getLinkageName() << "')";
     O << '\n';
   }
 
   for (DIGlobalVariable GV : Finder.global_variables()) {
-    O << "Global variable: " << GV.getName();
-    printFile(O, GV.getFilename(), GV.getDirectory(), GV.getLineNumber());
-    if (!GV.getLinkageName().empty())
-      O << " ('" << GV.getLinkageName() << "')";
+    O << "Global variable: " << GV->getName();
+    printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
+    if (!GV->getLinkageName().empty())
+      O << " ('" << GV->getLinkageName() << "')";
     O << '\n';
   }
 
-  for (DIType T : Finder.types()) {
+  for (const MDType *T : Finder.types()) {
     O << "Type:";
-    if (!T.getName().empty())
-      O << ' ' << T.getName();
-    printFile(O, T.getFilename(), T.getDirectory(), T.getLineNumber());
-    if (T.isBasicType()) {
-      DIBasicType BT(T.get());
+    if (!T->getName().empty())
+      O << ' ' << T->getName();
+    printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
+    if (auto *BT = dyn_cast<MDBasicType>(T)) {
       O << " ";
       if (const char *Encoding =
-              dwarf::AttributeEncodingString(BT.getEncoding()))
+              dwarf::AttributeEncodingString(BT->getEncoding()))
         O << Encoding;
       else
-        O << "unknown-encoding(" << BT.getEncoding() << ')';
+        O << "unknown-encoding(" << BT->getEncoding() << ')';
     } else {
       O << ' ';
-      if (const char *Tag = dwarf::TagString(T.getTag()))
+      if (const char *Tag = dwarf::TagString(T->getTag()))
         O << Tag;
       else
-        O << "unknown-tag(" << T.getTag() << ")";
+        O << "unknown-tag(" << T->getTag() << ")";
     }
-    if (T.isCompositeType()) {
-      DICompositeType CT(T.get());
-      if (auto *S = CT.getIdentifier())
+    if (auto *CT = dyn_cast<MDCompositeType>(T)) {
+      if (auto *S = CT->getRawIdentifier())
         O << " (identifier: '" << S->getString() << "')";
     }
     O << '\n';
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index cd1e944..5e1cdd4 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -199,7 +199,7 @@ public:
 
   bool runOnRegion(Region *R, RGPassManager &RGM) override {
     Out << Banner;
-    for (const auto &BB : R->blocks()) {
+    for (const auto *BB : R->blocks()) {
       if (BB)
         BB->print(Out);
       else
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index ad83113..d7f5109 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
 
     const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo());
 
-    for (const auto &BB : R.blocks())
+    for (auto *BB : R.blocks())
       if (RI.getRegionFor(BB) == &R)
         O.indent(2 * (depth + 1)) << "Node"
           << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB))
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 4e713fb..37377f0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -5690,7 +5690,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
             if (PTy->getElementType()->isStructTy())
               C2 = ConstantExpr::getIntegerCast(
                   C2, Type::getInt32Ty(C->getContext()), true);
-            C = ConstantExpr::getGetElementPtr(C, C2);
+            C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
           } else
             C = ConstantExpr::getAdd(C, C2);
         }
@@ -6698,6 +6698,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
       return true;
   }
 
+  struct ClearWalkingBEDominatingCondsOnExit {
+    ScalarEvolution &SE;
+
+    explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
+        : SE(SE){};
+
+    ~ClearWalkingBEDominatingCondsOnExit() {
+      SE.WalkingBEDominatingConds = false;
+    }
+  };
+
+  // We don't want more than one activation of the following loop on the stack
+  // -- that can lead to O(n!) time complexity.
+  if (WalkingBEDominatingConds)
+    return false;
+
+  WalkingBEDominatingConds = true;
+  ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
+
+  // If the loop is not reachable from the entry block, we risk running into an
+  // infinite loop as we walk up into the dom tree.  These loops do not matter
+  // anyway, so we just return a conservative answer when we see them.
+  if (!DT->isReachableFromEntry(L->getHeader()))
+    return false;
+
+  for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
+       DTN != HeaderDTN;
+       DTN = DTN->getIDom()) {
+
+    assert(DTN && "should reach the loop header before reaching the root!");
+
+    BasicBlock *BB = DTN->getBlock();
+    BasicBlock *PBB = BB->getSinglePredecessor();
+    if (!PBB)
+      continue;
+
+    BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
+    if (!ContinuePredicate || !ContinuePredicate->isConditional())
+      continue;
+
+    Value *Condition = ContinuePredicate->getCondition();
+
+    // If we have an edge `E` within the loop body that dominates the only
+    // latch, the condition guarding `E` also guards the backedge.  This
+    // reasoning works only for loops with a single latch.
+
+    BasicBlockEdge DominatingEdge(PBB, BB);
+    if (DominatingEdge.isSingleEdge()) {
+      // We're constructively (and conservatively) enumerating edges within the
+      // loop body that dominate the latch.  The dominator tree better agree
+      // with us on this:
+      assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+
+      if (isImpliedCond(Pred, LHS, RHS, Condition,
+                        BB != ContinuePredicate->getSuccessor(0)))
+        return true;
+    }
+  }
+
   return false;
 }
 
@@ -7968,8 +8027,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
-    BlockDispositions(64), FirstUnknown(nullptr) {
+    : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
+      LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
   initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
 }
 
@@ -8000,6 +8059,7 @@ void ScalarEvolution::releaseMemory() {
   }
 
   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+  assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
 
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index a73ec9e..0bd427b 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -488,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // Fold a GEP with constant operands.
     if (Constant *CLHS = dyn_cast<Constant>(V))
       if (Constant *CRHS = dyn_cast<Constant>(Idx))
-        return ConstantExpr::getGetElementPtr(CLHS, CRHS);
+        return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+                                              CLHS, CRHS);
 
     // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
     unsigned ScanLimit = 6;
@@ -523,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     }
 
     // Emit a GEP.
-    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+    Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
     rememberInstruction(GEP);
 
     return GEP;
@@ -1803,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   return NumElim;
 }
 
+bool SCEVExpander::isHighCostExpansionHelper(
+    const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+  if (!Processed.insert(S).second)
+    return false;
+
+  if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+    // If the divisor is a power of two and the SCEV type fits in a native
+    // integer, consider the divison cheap irrespective of whether it occurs in
+    // the user code since it can be lowered into a right shift.
+    if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
+      if (SC->getValue()->getValue().isPowerOf2()) {
+        const DataLayout &DL =
+            L->getHeader()->getParent()->getParent()->getDataLayout();
+        unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
+        return DL.isIllegalInteger(Width);
+      }
+
+    // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+    // HowManyLessThans produced to compute a precise expression, rather than a
+    // UDiv from the user's code. If we can't find a UDiv in the code with some
+    // simple searching, assume the former consider UDivExpr expensive to
+    // compute.
+    BasicBlock *ExitingBB = L->getExitingBlock();
+    if (!ExitingBB)
+      return true;
+
+    BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!ExitingBI || !ExitingBI->isConditional())
+      return true;
+
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
+    if (!OrigCond)
+      return true;
+
+    const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
+    RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
+    if (RHS != S) {
+      const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
+      LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
+      if (LHS != S)
+        return true;
+    }
+  }
+
+  // Recurse past add expressions, which commonly occur in the
+  // BackedgeTakenCount. They may already exist in program code, and if not,
+  // they are not too expensive rematerialize.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I) {
+      if (isHighCostExpansionHelper(*I, L, Processed))
+        return true;
+    }
+    return false;
+  }
+
+  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+  // the exit condition.
+  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+    return true;
+
+  // If we haven't recognized an expensive SCEV pattern, assume it's an
+  // expression produced by program code.
+  return false;
+}
+
 namespace {
 // Search for a SCEV subexpression that is not safe to expand.  Any expression
 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 7e574d5..8b378a3 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -409,9 +409,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
 
   // Check for \01 prefix that is used to mangle __asm declarations and
   // strip it if present.
-  if (funcName.front() == '\01')
-    funcName = funcName.substr(1);
-  return funcName;
+  return GlobalValue::getRealLinkageName(funcName);
 }
 
 bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index f51c7f54..a1519de 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -76,6 +76,10 @@ bool TargetTransformInfo::hasBranchDivergence() const {
   return TTIImpl->hasBranchDivergence();
 }
 
+bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
+  return TTIImpl->isSourceOfDivergence(V);
+}
+
 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
   return TTIImpl->isLoweredToCall(F);
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index f329e3a..3651301 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -694,10 +694,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // We're running this loop for once for each value queried resulting in a
     // runtime of ~O(#assumes * #values).
 
-    assert(isa<IntrinsicInst>(I) &&
-           dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume &&
+    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
            "must be an assume intrinsic");
-    
+
     Value *Arg = I->getArgOperand(0);
 
     if (Arg == V && isValidAssumeForContext(I, Q)) {
@@ -2935,7 +2934,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
   if (const LoadInst *LI = dyn_cast<LoadInst>(V))
     return LI->getMetadata(LLVMContext::MD_nonnull);
 
-  if (ImmutableCallSite CS = V)
+  if (auto CS = ImmutableCallSite(V))
     if (CS.isReturnNonNull())
       return true;
 
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 3bf090a..a72f713 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -598,6 +598,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(inalloca);
   KEYWORD(cold);
   KEYWORD(dereferenceable);
+  KEYWORD(dereferenceable_or_null);
   KEYWORD(inlinehint);
   KEYWORD(inreg);
   KEYWORD(jumptable);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 3343168..90bf17d 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -45,7 +45,6 @@ namespace llvm {
   public:
     explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &,
                      LLVMContext &C);
-    ~LLLexer() {}
 
     lltok::Kind Lex() {
       return CurKind = LexToken();
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 103c8c4..546363b 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -976,6 +976,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       break;
     case lltok::kw_byval:
     case lltok::kw_dereferenceable:
+    case lltok::kw_dereferenceable_or_null:
     case lltok::kw_inalloca:
     case lltok::kw_nest:
     case lltok::kw_noalias:
@@ -1220,11 +1221,18 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
-      if (ParseOptionalDereferenceableBytes(Bytes))
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
         return true;
       B.addDereferenceableAttr(Bytes);
       continue;
     }
+    case lltok::kw_dereferenceable_or_null: {
+      uint64_t Bytes;
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+        return true;
+      B.addDereferenceableOrNullAttr(Bytes);
+      continue;
+    }
     case lltok::kw_inalloca:        B.addAttribute(Attribute::InAlloca); break;
     case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
     case lltok::kw_nest:            B.addAttribute(Attribute::Nest); break;
@@ -1284,11 +1292,18 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
       return HaveError;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
-      if (ParseOptionalDereferenceableBytes(Bytes))
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
         return true;
       B.addDereferenceableAttr(Bytes);
       continue;
     }
+    case lltok::kw_dereferenceable_or_null: {
+      uint64_t Bytes;
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+        return true;
+      B.addDereferenceableOrNullAttr(Bytes);
+      continue;
+    }
     case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
     case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
     case lltok::kw_nonnull:         B.addAttribute(Attribute::NonNull); break;
@@ -1516,12 +1531,19 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
   return false;
 }
 
-/// ParseOptionalDereferenceableBytes
+/// ParseOptionalDerefAttrBytes
 ///   ::= /* empty */
-///   ::= 'dereferenceable' '(' 4 ')'
-bool LLParser::ParseOptionalDereferenceableBytes(uint64_t &Bytes) {
+///   ::= AttrKind '(' 4 ')'
+///
+/// where AttrKind is either 'dereferenceable' or 'dereferenceable_or_null'.
+bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind,
+                                           uint64_t &Bytes) {
+  assert((AttrKind == lltok::kw_dereferenceable ||
+          AttrKind == lltok::kw_dereferenceable_or_null) &&
+         "contract!");
+
   Bytes = 0;
-  if (!EatIfPresent(lltok::kw_dereferenceable))
+  if (!EatIfPresent(AttrKind))
     return false;
   LocTy ParenLoc = Lex.getLoc();
   if (!EatIfPresent(lltok::lparen))
@@ -2831,10 +2853,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
           !BasePointerType->getElementType()->isSized(&Visited))
         return Error(ID.Loc, "base element of getelementptr must be sized");
 
-      if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices))
+      if (!GetElementPtrInst::getIndexedType(Ty, Indices))
         return Error(ID.Loc, "invalid getelementptr indices");
-      ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], Indices,
-                                                      InBounds);
+      ID.ConstantVal =
+          ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices, InBounds);
     } else if (Opc == Instruction::Select) {
       if (Elts.size() != 3)
         return Error(ID.Loc, "expected three operands to select");
@@ -3030,13 +3052,17 @@ struct MDBoolField : public MDFieldImpl<bool> {
   MDBoolField(bool Default = false) : ImplTy(Default) {}
 };
 struct MDField : public MDFieldImpl<Metadata *> {
-  MDField() : ImplTy(nullptr) {}
+  bool AllowNull;
+
+  MDField(bool AllowNull = true) : ImplTy(nullptr), AllowNull(AllowNull) {}
 };
 struct MDConstant : public MDFieldImpl<ConstantAsMetadata *> {
   MDConstant() : ImplTy(nullptr) {}
 };
-struct MDStringField : public MDFieldImpl<std::string> {
-  MDStringField() : ImplTy(std::string()) {}
+struct MDStringField : public MDFieldImpl<MDString *> {
+  bool AllowEmpty;
+  MDStringField(bool AllowEmpty = true)
+      : ImplTy(nullptr), AllowEmpty(AllowEmpty) {}
 };
 struct MDFieldList : public MDFieldImpl<SmallVector<Metadata *, 4>> {
   MDFieldList() : ImplTy(SmallVector<Metadata *, 4>()) {}
@@ -3161,7 +3187,7 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
     if (Lex.getKind() != lltok::DIFlag)
       return TokError("expected debug info flag");
 
-    Val = DIDescriptor::getFlag(Lex.getStrVal());
+    Val = DebugNode::getFlag(Lex.getStrVal());
     if (!Val)
       return TokError(Twine("invalid debug info flag flag '") +
                       Lex.getStrVal() + "'");
@@ -3221,6 +3247,8 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDBoolField &Result) {
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) {
   if (Lex.getKind() == lltok::kw_null) {
+    if (!Result.AllowNull)
+      return TokError("'" + Name + "' cannot be null");
     Lex.Lex();
     Result.assign(nullptr);
     return false;
@@ -3246,11 +3274,15 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDConstant &Result) {
 
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDStringField &Result) {
+  LocTy ValueLoc = Lex.getLoc();
   std::string S;
   if (ParseStringConstant(S))
     return true;
 
-  Result.assign(std::move(S));
+  if (!Result.AllowEmpty && S.empty())
+    return Error(ValueLoc, "'" + Name + "' cannot be empty");
+
+  Result.assign(S.empty() ? nullptr : MDString::get(Context, S));
   return false;
 }
 
@@ -3343,7 +3375,7 @@ bool LLParser::ParseMDLocation(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(column, ColumnField, );                                             \
-  REQUIRED(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(inlinedAt, MDField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
@@ -3499,7 +3531,7 @@ bool LLParser::ParseMDFile(MDNode *&Result, bool IsDistinct) {
 bool LLParser::ParseMDCompileUnit(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(language, DwarfLangField, );                                        \
-  REQUIRED(file, MDField, );                                                   \
+  REQUIRED(file, MDField, (/* AllowNull */ false));                            \
   OPTIONAL(producer, MDStringField, );                                         \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
   OPTIONAL(flags, MDStringField, );                                            \
@@ -3567,7 +3599,7 @@ bool LLParser::ParseMDSubprogram(MDNode *&Result, bool IsDistinct) {
 ///   ::= !MDLexicalBlock(scope: !0, file: !2, line: 7, column: 9)
 bool LLParser::ParseMDLexicalBlock(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(column, ColumnField, );
@@ -3583,7 +3615,7 @@ bool LLParser::ParseMDLexicalBlock(MDNode *&Result, bool IsDistinct) {
 ///   ::= !MDLexicalBlockFile(scope: !0, file: !2, discriminator: 9)
 bool LLParser::ParseMDLexicalBlockFile(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(file, MDField, );                                                   \
   REQUIRED(discriminator, MDUnsignedField, (0, UINT32_MAX));
   PARSE_MD_FIELDS();
@@ -3648,8 +3680,8 @@ bool LLParser::ParseMDTemplateValueParameter(MDNode *&Result, bool IsDistinct) {
 ///                         declaration: !3)
 bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(name, MDStringField, (/* AllowEmpty */ false));                     \
   OPTIONAL(scope, MDField, );                                                  \
-  OPTIONAL(name, MDStringField, );                                             \
   OPTIONAL(linkageName, MDStringField, );                                      \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
@@ -3670,25 +3702,23 @@ bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) {
 
 /// ParseMDLocalVariable:
 ///   ::= !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !0, name: "foo",
-///                        file: !1, line: 7, type: !2, arg: 2, flags: 7,
-///                        inlinedAt: !3)
+///                        file: !1, line: 7, type: !2, arg: 2, flags: 7)
 bool LLParser::ParseMDLocalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(tag, DwarfTagField, );                                              \
-  OPTIONAL(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(name, MDStringField, );                                             \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(type, MDField, );                                                   \
   OPTIONAL(arg, MDUnsignedField, (0, UINT8_MAX));                              \
-  OPTIONAL(flags, DIFlagField, );                                              \
-  OPTIONAL(inlinedAt, MDField, );
+  OPTIONAL(flags, DIFlagField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  Result = GET_OR_DISTINCT(
-      MDLocalVariable, (Context, tag.Val, scope.Val, name.Val, file.Val,
-                        line.Val, type.Val, arg.Val, flags.Val, inlinedAt.Val));
+  Result = GET_OR_DISTINCT(MDLocalVariable,
+                           (Context, tag.Val, scope.Val, name.Val, file.Val,
+                            line.Val, type.Val, arg.Val, flags.Val));
   return false;
 }
 
@@ -5130,10 +5160,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
-  PointerType *PFTy = nullptr;
-  FunctionType *Ty = nullptr;
-  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
-      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+  FunctionType *Ty = dyn_cast<FunctionType>(RetType);
+  if (!Ty) {
     // Pull out the types of all of the arguments...
     std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
@@ -5143,12 +5171,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
 
     Ty = FunctionType::get(RetType, ParamTypes, false);
-    PFTy = PointerType::getUnqual(Ty);
   }
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+  if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
+    return true;
 
   // Set up the Attribute for the function.
   SmallVector<AttributeSet, 8> Attrs;
@@ -5269,7 +5297,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
     Lex.Lex();
   }
 
-  Type *Ty = nullptr;
+  Type *Ty;
   LocTy ExplicitTypeLoc = Lex.getLoc();
   if (ParseType(Ty) ||
       ParseToken(lltok::comma, "expected comma after load's type") ||
@@ -5278,8 +5306,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
       ParseOptionalCommaAlign(Alignment, AteExtraComma))
     return true;
 
-  if (!Val->getType()->isPointerTy() ||
-      !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+  if (!Val->getType()->isPointerTy() || !Ty->isFirstClassType())
     return Error(Loc, "load operand must be a pointer to a first class type");
   if (isAtomic && !Alignment)
     return Error(Loc, "atomic load must have explicit non-zero alignment");
@@ -5290,7 +5317,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(ExplicitTypeLoc,
                  "explicit pointee type doesn't match operand's pointee type");
 
-  Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope);
+  Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
@@ -5519,7 +5546,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
       !BasePointerType->getElementType()->isSized(&Visited))
     return Error(Loc, "base element of getelementptr must be sized");
 
-  if (!GetElementPtrInst::getIndexedType(BaseType, Indices))
+  if (!GetElementPtrInst::getIndexedType(
+          cast<PointerType>(BaseType->getScalarType())->getElementType(),
+          Indices))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ty, Ptr, Indices);
   if (InBounds)
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 5e92e57..117cdcb 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -223,7 +223,7 @@ namespace llvm {
     bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
     bool ParseOptionalCallingConv(unsigned &CC);
     bool ParseOptionalAlignment(unsigned &Alignment);
-    bool ParseOptionalDereferenceableBytes(uint64_t &Bytes);
+    bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
     bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
                                AtomicOrdering &Ordering);
     bool ParseOrdering(AtomicOrdering &Ordering);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index a7aa17c..2bdc53b 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -106,6 +106,7 @@ namespace lltok {
     kw_inalloca,
     kw_cold,
     kw_dereferenceable,
+    kw_dereferenceable_or_null,
     kw_inlinehint,
     kw_inreg,
     kw_jumptable,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 84753ff..5366f5f 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticPrinter.h"
@@ -218,6 +219,8 @@ class BitcodeReader : public GVMaterializer {
   /// True if any Metadata block has been materialized.
   bool IsMetadataMaterialized;
 
+  bool StripDebugInfo = false;
+
 public:
   std::error_code Error(BitcodeError E, const Twine &Message);
   std::error_code Error(BitcodeError E);
@@ -227,7 +230,7 @@ public:
                          DiagnosticHandlerFunction DiagnosticHandler);
   explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C,
                          DiagnosticHandlerFunction DiagnosticHandler);
-  ~BitcodeReader() { FreeState(); }
+  ~BitcodeReader() override { FreeState(); }
 
   std::error_code materializeForwardReferencedFunctions();
 
@@ -255,6 +258,8 @@ public:
   /// Materialize any deferred Metadata block.
   std::error_code materializeMetadata() override;
 
+  void setStripDebugInfo() override;
+
 private:
   std::vector<StructType *> IdentifiedStructTypes;
   StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
@@ -1093,6 +1098,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
     return Attribute::NonNull;
   case bitc::ATTR_KIND_DEREFERENCEABLE:
     return Attribute::Dereferenceable;
+  case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL:
+    return Attribute::DereferenceableOrNull;
   case bitc::ATTR_KIND_NO_RED_ZONE:
     return Attribute::NoRedZone;
   case bitc::ATTR_KIND_NO_RETURN:
@@ -1209,6 +1216,8 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
             B.addStackAlignmentAttr(Record[++i]);
           else if (Kind == Attribute::Dereferenceable)
             B.addDereferenceableAttr(Record[++i]);
+          else if (Kind == Attribute::DereferenceableOrNull)
+            B.addDereferenceableOrNullAttr(Record[++i]);
         } else {                     // String attribute
           assert((Record[i] == 3 || Record[i] == 4) &&
                  "Invalid attribute group entry");
@@ -1906,7 +1915,8 @@ std::error_code BitcodeReader::ParseMetadata() {
       break;
     }
     case bitc::METADATA_LOCAL_VAR: {
-      if (Record.size() != 10)
+      // 10th field is for the obseleted 'inlinedAt:' field.
+      if (Record.size() != 9 && Record.size() != 10)
         return Error("Invalid record");
 
       MDValueList.AssignValue(
@@ -1914,7 +1924,7 @@ std::error_code BitcodeReader::ParseMetadata() {
                           (Context, Record[1], getMDOrNull(Record[2]),
                            getMDString(Record[3]), getMDOrNull(Record[4]),
                            Record[5], getMDOrNull(Record[6]), Record[7],
-                           Record[8], getMDOrNull(Record[9]))),
+                           Record[8])),
           NextMDValueNo++);
       break;
     }
@@ -2308,14 +2318,17 @@ std::error_code BitcodeReader::ParseConstants() {
         Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
       }
 
-      ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
-      V = ConstantExpr::getGetElementPtr(Elts[0], Indices,
-                                         BitCode ==
-                                           bitc::CST_CODE_CE_INBOUNDS_GEP);
       if (PointeeType &&
-          PointeeType != cast<GEPOperator>(V)->getSourceElementType())
+          PointeeType !=
+              cast<SequentialType>(Elts[0]->getType()->getScalarType())
+                  ->getElementType())
         return Error("Explicit gep operator type does not match pointee type "
                      "of pointer operand");
+
+      ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
+      V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
+                                         BitCode ==
+                                             bitc::CST_CODE_CE_INBOUNDS_GEP);
       break;
     }
     case bitc::CST_CODE_CE_SELECT: {  // CE_SELECT: [opval#, opval#, opval#]
@@ -2609,6 +2622,8 @@ std::error_code BitcodeReader::materializeMetadata() {
   return std::error_code();
 }
 
+void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
+
 /// RememberAndSkipFunctionBody - When we see the block for a function body,
 /// remember where it is and then skip it.  This lets us lazily deserialize the
 /// functions.
@@ -3053,8 +3068,12 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M,
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (1) {
-    if (Stream.AtEndOfStream())
-      return std::error_code();
+    if (Stream.AtEndOfStream()) {
+      if (TheModule)
+        return std::error_code();
+      // We didn't really read a proper Module.
+      return Error("Malformed IR file");
+    }
 
     BitstreamEntry Entry =
       Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
@@ -4305,6 +4324,9 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
     return EC;
   F->setIsMaterializable(false);
 
+  if (StripDebugInfo)
+    stripDebugInfo(*F);
+
   // Upgrade any old intrinsic calls in the function.
   for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
        E = UpgradedIntrinsics.end(); I != E; ++I) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0123fb2..aa4a6a4 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -200,6 +200,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_NON_NULL;
   case Attribute::Dereferenceable:
     return bitc::ATTR_KIND_DEREFERENCEABLE;
+  case Attribute::DereferenceableOrNull:
+    return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL;
   case Attribute::NoRedZone:
     return bitc::ATTR_KIND_NO_RED_ZONE;
   case Attribute::NoReturn:
@@ -821,7 +823,7 @@ static void WriteMDSubrange(const MDSubrange *N, const ValueEnumerator &,
                             unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getCount());
-  Record.push_back(rotateSign(N->getLo()));
+  Record.push_back(rotateSign(N->getLowerBound()));
 
   Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev);
   Record.clear();
@@ -892,10 +894,10 @@ static void WriteMDCompositeType(const MDCompositeType *N,
   Record.push_back(N->getAlignInBits());
   Record.push_back(N->getOffsetInBits());
   Record.push_back(N->getFlags());
-  Record.push_back(VE.getMetadataOrNullID(N->getElements()));
+  Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
   Record.push_back(N->getRuntimeLang());
   Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder()));
-  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier()));
 
   Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
@@ -909,7 +911,7 @@ static void WriteMDSubroutineType(const MDSubroutineType *N,
                                   unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getFlags());
-  Record.push_back(VE.getMetadataOrNullID(N->getTypeArray()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get()));
 
   Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev);
   Record.clear();
@@ -940,11 +942,11 @@ static void WriteMDCompileUnit(const MDCompileUnit *N,
   Record.push_back(N->getRuntimeVersion());
   Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename()));
   Record.push_back(N->getEmissionKind());
-  Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes()));
-  Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes()));
-  Record.push_back(VE.getMetadataOrNullID(N->getSubprograms()));
-  Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables()));
-  Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities()));
+  Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getSubprograms().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
 
   Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
   Record.clear();
@@ -970,10 +972,10 @@ static void WriteMDSubprogram(const MDSubprogram *N,
   Record.push_back(N->getVirtualIndex());
   Record.push_back(N->getFlags());
   Record.push_back(N->isOptimized());
-  Record.push_back(VE.getMetadataOrNullID(N->getFunction()));
-  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawFunction()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
-  Record.push_back(VE.getMetadataOrNullID(N->getVariables()));
+  Record.push_back(VE.getMetadataOrNullID(N->getVariables().get()));
 
   Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
   Record.clear();
@@ -1064,7 +1066,7 @@ static void WriteMDGlobalVariable(const MDGlobalVariable *N,
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->isLocalToUnit());
   Record.push_back(N->isDefinition());
-  Record.push_back(VE.getMetadataOrNullID(N->getVariable()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawVariable()));
   Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
 
   Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
@@ -1085,7 +1087,6 @@ static void WriteMDLocalVariable(const MDLocalVariable *N,
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->getArg());
   Record.push_back(N->getFlags());
-  Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt()));
 
   Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
   Record.clear();
@@ -2047,6 +2048,9 @@ static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
 
 static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
                               BitstreamWriter &Stream) {
+  assert(VE.shouldPreserveUseListOrder() &&
+         "Expected to be preserving use-list order");
+
   auto hasMore = [&]() {
     return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
   };
@@ -2089,7 +2093,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
 
   bool NeedsMetadataAttachment = false;
 
-  DebugLoc LastDL;
+  MDLocation *LastDL = nullptr;
 
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -2104,26 +2108,22 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
       NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
 
       // If the instruction has a debug location, emit it.
-      DebugLoc DL = I->getDebugLoc();
-      if (DL.isUnknown()) {
-        // nothing todo.
-      } else if (DL == LastDL) {
+      MDLocation *DL = I->getDebugLoc();
+      if (!DL)
+        continue;
+
+      if (DL == LastDL) {
         // Just repeat the same debug loc as last time.
         Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
-      } else {
-        MDNode *Scope, *IA;
-        DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
-        assert(Scope && "Expected valid scope");
-
-        Vals.push_back(DL.getLine());
-        Vals.push_back(DL.getCol());
-        Vals.push_back(VE.getMetadataOrNullID(Scope));
-        Vals.push_back(VE.getMetadataOrNullID(IA));
-        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
-        Vals.clear();
-
-        LastDL = DL;
+        continue;
       }
+
+      Vals.push_back(DL->getLine());
+      Vals.push_back(DL->getColumn());
+      Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
+      Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
+      Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+      Vals.clear();
     }
 
   // Emit names for all the instructions etc.
@@ -2131,7 +2131,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
 
   if (NeedsMetadataAttachment)
     WriteMetadataAttachment(F, VE, Stream);
-  if (shouldPreserveBitcodeUseListOrder())
+  if (VE.shouldPreserveUseListOrder())
     WriteUseListBlock(&F, VE, Stream);
   VE.purgeFunction();
   Stream.ExitBlock();
@@ -2313,7 +2313,8 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 }
 
 /// WriteModule - Emit the specified module to the bitstream.
-static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+static void WriteModule(const Module *M, BitstreamWriter &Stream,
+                        bool ShouldPreserveUseListOrder) {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
   SmallVector<unsigned, 1> Vals;
@@ -2322,7 +2323,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
 
   // Analyze the module, enumerating globals, functions, etc.
-  ValueEnumerator VE(*M);
+  ValueEnumerator VE(*M, ShouldPreserveUseListOrder);
 
   // Emit blockinfo, which defines the standard abbreviations etc.
   WriteBlockInfo(VE, Stream);
@@ -2355,7 +2356,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
 
   // Emit module-level use-lists.
-  if (shouldPreserveBitcodeUseListOrder())
+  if (VE.shouldPreserveUseListOrder())
     WriteUseListBlock(nullptr, VE, Stream);
 
   // Emit function bodies.
@@ -2441,7 +2442,8 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
 
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
-void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
+                              bool ShouldPreserveUseListOrder) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
@@ -2464,7 +2466,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
     Stream.Emit(0xD, 4);
 
     // Emit the module.
-    WriteModule(M, Stream);
+    WriteModule(M, Stream, ShouldPreserveUseListOrder);
   }
 
   if (TT.isOSDarwin())
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 25456a4..3165743 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -19,22 +19,25 @@
 using namespace llvm;
 
 PreservedAnalyses BitcodeWriterPass::run(Module &M) {
-  WriteBitcodeToFile(&M, OS);
+  WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
   return PreservedAnalyses::all();
 }
 
 namespace {
   class WriteBitcodePass : public ModulePass {
     raw_ostream &OS; // raw_ostream to print on
+    bool ShouldPreserveUseListOrder;
+
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit WriteBitcodePass(raw_ostream &o)
-      : ModulePass(ID), OS(o) {}
+    explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder)
+        : ModulePass(ID), OS(o),
+          ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
     const char *getPassName() const override { return "Bitcode Writer"; }
 
     bool runOnModule(Module &M) override {
-      WriteBitcodeToFile(&M, OS);
+      WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
       return false;
     }
   };
@@ -42,6 +45,7 @@ namespace {
 
 char WriteBitcodePass::ID = 0;
 
-ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
-  return new WriteBitcodePass(Str);
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str,
+                                          bool ShouldPreserveUseListOrder) {
+  return new WriteBitcodePass(Str, ShouldPreserveUseListOrder);
 }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 549e94f..7f576d7 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -283,9 +283,11 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
   return V.first->getType()->isIntOrIntVectorTy();
 }
 
-ValueEnumerator::ValueEnumerator(const Module &M)
-    : HasMDString(false), HasMDLocation(false), HasGenericDebugNode(false) {
-  if (shouldPreserveBitcodeUseListOrder())
+ValueEnumerator::ValueEnumerator(const Module &M,
+                                 bool ShouldPreserveUseListOrder)
+    : HasMDString(false), HasMDLocation(false), HasGenericDebugNode(false),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
+  if (ShouldPreserveUseListOrder)
     UseListOrders = predictUseListOrder(M);
 
   // Enumerate the global variables.
@@ -373,12 +375,10 @@ ValueEnumerator::ValueEnumerator(const Module &M)
         for (unsigned i = 0, e = MDs.size(); i != e; ++i)
           EnumerateMetadata(MDs[i].second);
 
-        if (!I.getDebugLoc().isUnknown()) {
-          MDNode *Scope, *IA;
-          I.getDebugLoc().getScopeAndInlinedAt(Scope, IA, I.getContext());
-          if (Scope) EnumerateMetadata(Scope);
-          if (IA) EnumerateMetadata(IA);
-        }
+        // Don't enumerate the location directly -- it has a special record
+        // type -- but enumerate its operands.
+        if (MDLocation *L = I.getDebugLoc())
+          EnumerateMDNodeOperands(L);
       }
   }
 
@@ -463,7 +463,7 @@ void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
   if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
 
-  if (shouldPreserveBitcodeUseListOrder())
+  if (ShouldPreserveUseListOrder)
     // Optimizing constants makes the use-list order difficult to predict.
     // Disable it for now when trying to preserve the order.
     return;
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index b94c370..ba245a3 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -67,6 +67,7 @@ private:
   bool HasMDString;
   bool HasMDLocation;
   bool HasGenericDebugNode;
+  bool ShouldPreserveUseListOrder;
 
   typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
   AttributeGroupMapType AttributeGroupMap;
@@ -102,7 +103,7 @@ private:
   ValueEnumerator(const ValueEnumerator &) = delete;
   void operator=(const ValueEnumerator &) = delete;
 public:
-  ValueEnumerator(const Module &M);
+  ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder);
 
   void dump() const;
   void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
@@ -123,6 +124,8 @@ public:
   bool hasMDLocation() const { return HasMDLocation; }
   bool hasGenericDebugNode() const { return HasGenericDebugNode; }
 
+  bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; }
+
   unsigned getTypeID(Type *T) const {
     TypeMapType::const_iterator I = TypeMap.find(T);
     assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 12cf95b..f9544dd 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -127,7 +127,7 @@ class RegisterClassInfo;
     AggressiveAntiDepBreaker(MachineFunction& MFi,
                           const RegisterClassInfo &RCI,
                           TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
-    ~AggressiveAntiDepBreaker();
+    ~AggressiveAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 07d6731..43d7a38 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -28,7 +28,7 @@
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Mangler.h"
@@ -671,17 +671,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   OS << "DEBUG_VALUE: ";
 
   DIVariable V = MI->getDebugVariable();
-  if (V.getContext().isSubprogram()) {
-    StringRef Name = DISubprogram(V.getContext()).getDisplayName();
+  if (auto *SP = dyn_cast<MDSubprogram>(V->getScope())) {
+    StringRef Name = SP->getDisplayName();
     if (!Name.empty())
       OS << Name << ":";
   }
-  OS << V.getName();
+  OS << V->getName();
 
   DIExpression Expr = MI->getDebugExpression();
-  if (Expr.isBitPiece())
-    OS << " [bit_piece offset=" << Expr.getBitPieceOffset()
-       << " size=" << Expr.getBitPieceSize() << "]";
+  if (Expr->isBitPiece())
+    OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
+       << " size=" << Expr->getBitPieceSize() << "]";
   OS << " <- ";
 
   // The second operand is only an offset if it's an immediate.
@@ -1034,11 +1034,31 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
   // Emit module flags.
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
   if (!ModuleFlags.empty())
-    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM);
+    TLOF.emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM);
+
+  Triple TT(TM.getTargetTriple());
+  if (TT.isOSBinFormatELF()) {
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOF.getDataRelSection());
+      const DataLayout *DL = TM.getDataLayout();
+
+      for (const auto &Stub : Stubs) {
+        OutStreamer.EmitLabel(Stub.first);
+        OutStreamer.EmitSymbolValue(Stub.second.getPointer(),
+                                    DL->getPointerSize());
+      }
+    }
+  }
 
   // Make sure we wrote out everything we need.
   OutStreamer.Flush();
@@ -2302,7 +2322,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
 MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
   SmallString<60> NameStr;
   Mang->getNameWithPrefix(NameStr, Sym);
-  return OutContext.GetOrCreateSymbol(NameStr.str());
+  return OutContext.GetOrCreateSymbol(NameStr);
 }
 
 
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index bbdf237..1e3c5d7 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -33,7 +33,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
-void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
                                          const MachineInstr &MI) {
   // Instruction range should start with a DBG_VALUE instruction for the
   // variable.
@@ -48,7 +48,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
   Ranges.push_back(std::make_pair(&MI, nullptr));
 }
 
-void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
                                        const MachineInstr &MI) {
   auto &Ranges = VarInstrRanges[Var];
   // Verify that the current instruction range is not yet closed.
@@ -59,7 +59,7 @@ void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
   Ranges.back().second = &MI;
 }
 
-unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
   const auto &I = VarInstrRanges.find(Var);
   if (I == VarInstrRanges.end())
     return 0;
@@ -71,12 +71,13 @@ unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
 
 namespace {
 // Maps physreg numbers to the variables they describe.
-typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap;
+typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap;
 }
 
 // \brief Claim that @Var is not described by @RegNo anymore.
-static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
-                                unsigned RegNo, const MDNode *Var) {
+static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                                InlinedVariable Var) {
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
@@ -89,8 +90,8 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
 }
 
 // \brief Claim that @Var is now described by @RegNo.
-static void addRegDescribedVar(RegDescribedVarsMap &RegVars,
-                               unsigned RegNo, const MDNode *Var) {
+static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                               InlinedVariable Var) {
   assert(RegNo != 0U);
   auto &VarSet = RegVars[RegNo];
   assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
@@ -203,7 +204,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
       // Use the base variable (without any DW_OP_piece expressions)
       // as index into History. The full variables including the
       // piece expressions are attached to the MI.
-      DIVariable Var = MI.getDebugVariable();
+      MDLocalVariable *RawVar = MI.getDebugVariable();
+      assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+             "Expected inlined-at fields to agree");
+      InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
 
       if (unsigned PrevReg = Result.getRegisterForVar(Var))
         dropRegDescribedVar(RegVars, PrevReg, Var);
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 4b62007..c25aaff 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -17,7 +17,8 @@ namespace llvm {
 
 class MachineFunction;
 class MachineInstr;
-class MDNode;
+class MDLocalVariable;
+class MDLocation;
 class TargetRegisterInfo;
 
 // For each user variable, keep a list of instruction ranges where this variable
@@ -31,16 +32,19 @@ class DbgValueHistoryMap {
 public:
   typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
   typedef SmallVector<InstrRange, 4> InstrRanges;
-  typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap;
+  typedef std::pair<const MDLocalVariable *, const MDLocation *>
+      InlinedVariable;
+  typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap;
+
 private:
   InstrRangesMap VarInstrRanges;
 
 public:
-  void startInstrRange(const MDNode *Var, const MachineInstr &MI);
-  void endInstrRange(const MDNode *Var, const MachineInstr &MI);
+  void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
+  void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
   // Returns register currently describing @Var. If @Var is currently
   // unaccessible or is not described by a register, returns 0.
-  unsigned getRegisterForVar(const MDNode *Var) const;
+  unsigned getRegisterForVar(InlinedVariable Var) const;
 
   bool empty() const { return VarInstrRanges.empty(); }
   void clear() { VarInstrRanges.clear(); }
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 6914bbe..4f6714e 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -42,8 +42,8 @@ public:
     }
     Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
         : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
-      assert(DIVariable(Var).Verify());
-      assert(DIExpression(Expr)->isValid());
+      assert(isa<MDLocalVariable>(Var));
+      assert(cast<MDExpression>(Expr)->isValid());
     }
 
     /// The variable to which this location entry corresponds.
@@ -74,10 +74,11 @@ public:
     const ConstantFP *getConstantFP() const { return Constant.CFP; }
     const ConstantInt *getConstantInt() const { return Constant.CIP; }
     MachineLocation getLoc() const { return Loc; }
-    const MDNode *getVariableNode() const { return Variable; }
-    DIVariable getVariable() const { return DIVariable(Variable); }
-    bool isBitPiece() const { return getExpression().isBitPiece(); }
-    DIExpression getExpression() const { return DIExpression(Expression); }
+    DIVariable getVariable() const { return cast<MDLocalVariable>(Variable); }
+    bool isBitPiece() const { return getExpression()->isBitPiece(); }
+    DIExpression getExpression() const {
+      return cast_or_null<MDExpression>(Expression);
+    }
     friend bool operator==(const Value &, const Value &);
     friend bool operator<(const Value &, const Value &);
   };
@@ -101,12 +102,12 @@ public:
   /// Return true if the merge was successful.
   bool MergeValues(const DebugLocEntry &Next) {
     if (Begin == Next.Begin) {
-      DIExpression Expr(Values[0].Expression);
-      DIVariable Var(Values[0].Variable);
-      DIExpression NextExpr(Next.Values[0].Expression);
-      DIVariable NextVar(Next.Values[0].Variable);
-      if (Var == NextVar && Expr.isBitPiece() &&
-          NextExpr.isBitPiece()) {
+      DIExpression Expr = cast_or_null<MDExpression>(Values[0].Expression);
+      DIVariable Var = cast_or_null<MDLocalVariable>(Values[0].Variable);
+      DIExpression NextExpr =
+          cast_or_null<MDExpression>(Next.Values[0].Expression);
+      DIVariable NextVar = cast_or_null<MDLocalVariable>(Next.Values[0].Variable);
+      if (Var == NextVar && Expr->isBitPiece() && NextExpr->isBitPiece()) {
         addValues(Next.Values);
         End = Next.End;
         return true;
@@ -189,8 +190,8 @@ inline bool operator==(const DebugLocEntry::Value &A,
 /// \brief Compare two pieces based on their offset.
 inline bool operator<(const DebugLocEntry::Value &A,
                       const DebugLocEntry::Value &B) {
-  return A.getExpression().getBitPieceOffset() <
-         B.getExpression().getBitPieceOffset();
+  return A.getExpression()->getBitPieceOffset() <
+         B.getExpression()->getBitPieceOffset();
 }
 
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index eee5fc5..75d3b68 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -101,51 +101,52 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
   if (DIE *Die = getDIE(GV))
     return Die;
 
-  assert(GV.isGlobalVariable());
+  assert(GV);
 
-  DIScope GVContext = GV.getContext();
-  DIType GTy = DD->resolve(GV.getType());
+  DIScope GVContext = GV->getScope();
+  DIType GTy = DD->resolve(GV->getType());
 
   // Construct the context before querying for the existence of the DIE in
   // case such construction creates the DIE.
   DIE *ContextDIE = getOrCreateContextDIE(GVContext);
 
   // Add to map.
-  DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
+  DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
   DIScope DeclContext;
 
-  if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) {
-    DeclContext = resolve(SDMDecl.getContext());
-    assert(SDMDecl.isStaticMember() && "Expected static member decl");
-    assert(GV.isDefinition());
+  if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
+    DeclContext = resolve(SDMDecl->getScope());
+    assert(SDMDecl->isStaticMember() && "Expected static member decl");
+    assert(GV->isDefinition());
     // We need the declaration DIE that is in the static member's class.
     DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
     addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
   } else {
-    DeclContext = GV.getContext();
+    DeclContext = GV->getScope();
     // Add name and type.
-    addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+    addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
     addType(*VariableDIE, GTy);
 
     // Add scoping info.
-    if (!GV.isLocalToUnit())
+    if (!GV->isLocalToUnit())
       addFlag(*VariableDIE, dwarf::DW_AT_external);
 
     // Add line number info.
     addSourceLine(*VariableDIE, GV);
   }
 
-  if (!GV.isDefinition())
+  if (!GV->isDefinition())
     addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+  else
+    addGlobalName(GV->getName(), *VariableDIE, DeclContext);
 
   // Add location.
   bool addToAccelTable = false;
-  bool isGlobalVariable = GV.getGlobal() != nullptr;
-  if (isGlobalVariable) {
+  if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
     addToAccelTable = true;
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
-    const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
-    if (GV.getGlobal()->isThreadLocal()) {
+    const MCSymbol *Sym = Asm->getSymbol(Global);
+    if (Global->isThreadLocal()) {
       // FIXME: Make this work with -gsplit-dwarf.
       unsigned PointerSize = Asm->getDataLayout().getPointerSize();
       assert((PointerSize == 4 || PointerSize == 8) &&
@@ -174,11 +175,11 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
     }
 
     addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
-    addLinkageName(*VariableDIE, GV.getLinkageName());
+    addLinkageName(*VariableDIE, GV->getLinkageName());
   } else if (const ConstantInt *CI =
-                 dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+                 dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
     addConstantValue(*VariableDIE, CI, GTy);
-  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) {
+  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
     addToAccelTable = true;
     // GV is a merged global.
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
@@ -195,15 +196,14 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
   }
 
   if (addToAccelTable) {
-    DD->addAccelName(GV.getName(), *VariableDIE);
+    DD->addAccelName(GV->getName(), *VariableDIE);
 
     // If the linkage name is different than the name, go ahead and output
     // that as well into the name table.
-    if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
-      DD->addAccelName(GV.getLinkageName(), *VariableDIE);
+    if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName())
+      DD->addAccelName(GV->getLinkageName(), *VariableDIE);
   }
 
-  addGlobalName(GV.getName(), *VariableDIE, DeclContext);
   return VariableDIE;
 }
 
@@ -307,7 +307,7 @@ void DwarfCompileUnit::constructScopeDIE(
 
   DIScope DS(Scope->getScopeNode());
 
-  assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
+  assert((Scope->getInlinedAt() || !isa<MDSubprogram>(DS)) &&
          "Only handle inlined subprograms here, use "
          "constructSubprogramScopeDIE for non-inlined "
          "subprograms");
@@ -318,7 +318,7 @@ void DwarfCompileUnit::constructScopeDIE(
   // avoid creating un-used children then removing them later when we find out
   // the scope DIE is null.
   std::unique_ptr<DIE> ScopeDIE;
-  if (Scope->getParent() && DS.isSubprogram()) {
+  if (Scope->getParent() && isa<MDSubprogram>(DS)) {
     ScopeDIE = constructInlinedScopeDIE(Scope);
     if (!ScopeDIE)
       return;
@@ -340,7 +340,7 @@ void DwarfCompileUnit::constructScopeDIE(
       // There is no need to emit empty lexical block DIE.
       for (const auto &E : DD->findImportedEntitiesForScope(DS))
         Children.push_back(
-            constructImportedEntityDIE(DIImportedEntity(E.second)));
+            constructImportedEntityDIE(cast<MDImportedEntity>(E.second)));
     }
 
     // If there are only other scopes as children, put them directly in the
@@ -431,10 +431,10 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
 
   // Add the call site information to the DIE.
-  DILocation DL(Scope->getInlinedAt());
+  const MDLocation *IA = Scope->getInlinedAt();
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
-          getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
-  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
+          getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
+  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -523,7 +523,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
     assert(Expr != DV.getExpression().end() &&
            "Wrong number of expressions");
     DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
-    DwarfExpr.AddExpression(Expr->begin(), Expr->end());
+    DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
     ++Expr;
   }
   addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
@@ -562,16 +562,14 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   assert(Scope && Scope->getScopeNode());
   assert(!Scope->getInlinedAt());
   assert(!Scope->isAbstractScope());
-  DISubprogram Sub(Scope->getScopeNode());
-
-  assert(Sub.isSubprogram());
+  DISubprogram Sub = cast<MDSubprogram>(Scope->getScopeNode());
 
   DD->getProcessedSPNodes().insert(Sub);
 
   DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
 
   // If this is a variadic function, add an unspecified parameter.
-  DITypeArray FnArgs = Sub.getType().getTypeArray();
+  DITypeArray FnArgs = Sub->getType()->getTypeArray();
 
   // Collect lexical scope children first.
   // ObjectPointer might be a local (non-argument) local variable if it's a
@@ -582,8 +580,7 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   // If we have a single element of null, it is a function that returns void.
   // If we have more than one elements and the last one is null, it is a
   // variadic function.
-  if (FnArgs.getNumElements() > 1 &&
-      !FnArgs.getElement(FnArgs.getNumElements() - 1) &&
+  if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
       !includeMinimalInlineScopes())
     ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
 }
@@ -607,7 +604,7 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   if (AbsDef)
     return;
 
-  DISubprogram SP(Scope->getScopeNode());
+  DISubprogram SP = cast<MDSubprogram>(Scope->getScopeNode());
 
   DIE *ContextDIE;
 
@@ -617,11 +614,11 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   // the important distinction that the DIDescriptor is not associated with the
   // DIE (since the DIDescriptor will be associated with the concrete DIE, if
   // any). It could be refactored to some common utility function.
-  else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  else if (auto *SPDecl = SP->getDeclaration()) {
     ContextDIE = &getUnitDie();
     getOrCreateSubprogramDIE(SPDecl);
   } else
-    ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+    ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
 
   // Passing null as the associated DIDescriptor because the abstract definition
   // shouldn't be found by lookup.
@@ -637,28 +634,25 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
 
 std::unique_ptr<DIE>
 DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) {
-  assert(Module.Verify() &&
-         "Use one of the MDNode * overloads to handle invalid metadata");
-  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag());
+  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module->getTag());
   insertDIE(Module, IMDie.get());
   DIE *EntityDie;
-  DIDescriptor Entity = resolve(Module.getEntity());
-  if (Entity.isNameSpace())
-    EntityDie = getOrCreateNameSpace(DINameSpace(Entity));
-  else if (Entity.isSubprogram())
-    EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity));
-  else if (Entity.isType())
-    EntityDie = getOrCreateTypeDIE(DIType(Entity));
-  else if (Entity.isGlobalVariable())
-    EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity));
+  auto *Entity = resolve(Module->getEntity());
+  if (auto *NS = dyn_cast<MDNamespace>(Entity))
+    EntityDie = getOrCreateNameSpace(NS);
+  else if (auto *SP = dyn_cast<MDSubprogram>(Entity))
+    EntityDie = getOrCreateSubprogramDIE(SP);
+  else if (auto *T = dyn_cast<MDType>(Entity))
+    EntityDie = getOrCreateTypeDIE(T);
+  else if (auto *GV = dyn_cast<MDGlobalVariable>(Entity))
+    EntityDie = getOrCreateGlobalVariableDIE(GV);
   else
     EntityDie = getDIE(Entity);
   assert(EntityDie);
-  addSourceLine(*IMDie, Module.getLineNumber(),
-                Module.getContext().getFilename(),
-                Module.getContext().getDirectory());
+  addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(),
+                Module->getScope()->getDirectory());
   addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
-  StringRef Name = Module.getName();
+  StringRef Name = Module->getName();
   if (!Name.empty())
     addString(*IMDie, dwarf::DW_AT_name, Name);
 
@@ -683,21 +677,19 @@ void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
   }
 }
 void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
-  assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram");
-  assert(SP.isDefinition() &&
+  assert(SP && "CU's subprogram list contains a non-subprogram");
+  assert(SP->isDefinition() &&
          "CU's subprogram list contains a subprogram declaration");
-  DIArray Variables = SP.getVariables();
-  if (Variables.getNumElements() == 0)
+  auto Variables = SP->getVariables();
+  if (Variables.size() == 0)
     return;
 
   DIE *SPDIE = DU->getAbstractSPDies().lookup(SP);
   if (!SPDIE)
     SPDIE = getDIE(SP);
   assert(SPDIE);
-  for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
-    DIVariable DV(Variables.getElement(vi));
-    assert(DV.isVariable());
-    DbgVariable NewVar(DV, DIExpression(), DD);
+  for (DIVariable DV : Variables) {
+    DbgVariable NewVar(DV, nullptr, DIExpression(), DD);
     auto VariableDie = constructVariableDIE(NewVar);
     applyVariableAttributes(NewVar, *VariableDie);
     SPDIE->addChild(std::move(VariableDie));
@@ -728,7 +720,7 @@ void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die,
                                      DIScope Context) {
   if (includeMinimalInlineScopes())
     return;
-  std::string FullName = getParentContextString(Context) + Ty.getName().str();
+  std::string FullName = getParentContextString(Context) + Ty->getName().str();
   GlobalTypes[FullName] = &Die;
 }
 
@@ -778,7 +770,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
     ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(),
                                                Location.getOffset());
     if (ValidReg)
-      DwarfExpr.AddExpression(Expr.begin(), Expr.end());
+      DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
   } else
     ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg());
 
@@ -816,10 +808,10 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
 
 void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP,
                                                              DIE &SPDie) {
-  DISubprogram SPDecl = SP.getFunctionDeclaration();
-  DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+  auto *SPDecl = SP->getDeclaration();
+  DIScope Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
   applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
-  addGlobalName(SP.getName(), SPDie, Context);
+  addGlobalName(SP->getName(), SPDie, Context);
 }
 
 bool DwarfCompileUnit::isDwoUnit() const {
@@ -827,7 +819,7 @@ bool DwarfCompileUnit::isDwoUnit() const {
 }
 
 bool DwarfCompileUnit::includeMinimalInlineScopes() const {
-  return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly ||
+  return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
 } // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index e9ebd97..fb8fc6e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -129,20 +129,22 @@ bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
 
 /// resolve - Look in the DwarfDebug map for the MDNode that
 /// corresponds to the reference.
-template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const {
+template <typename T> T *DbgVariable::resolve(TypedDebugNodeRef<T> Ref) const {
   return DD->resolve(Ref);
 }
 
 bool DbgVariable::isBlockByrefVariable() const {
-  assert(Var.isVariable() && "Invalid complex DbgVariable!");
-  return Var.isBlockByrefVariable(DD->getTypeIdentifierMap());
+  assert(Var && "Invalid complex DbgVariable!");
+  return Var->getType()
+      .resolve(DD->getTypeIdentifierMap())
+      ->isBlockByrefStruct();
 }
 
 DIType DbgVariable::getType() const {
-  DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap());
+  MDType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap());
   // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
-  if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) {
+  if (Ty->isBlockByrefStruct()) {
     /* Byref variables, in Blocks, are declared by the programmer as
        "SomeType VarName;", but the compiler creates a
        __Block_byref_x_VarName struct, and gives the variable VarName
@@ -167,17 +169,17 @@ DIType DbgVariable::getType() const {
        have a DW_AT_location that tells the debugger how to unwind through
        the pointers and __Block_byref_x_VarName struct to find the actual
        value of the variable.  The function addBlockByrefType does this.  */
-    DIType subType = Ty;
-    uint16_t tag = Ty.getTag();
+    MDType *subType = Ty;
+    uint16_t tag = Ty->getTag();
 
     if (tag == dwarf::DW_TAG_pointer_type)
-      subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom());
+      subType = resolve(DITypeRef(cast<MDDerivedType>(Ty)->getBaseType()));
 
-    DIArray Elements = DICompositeType(subType).getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDerivedType DT(Elements.getElement(i));
-      if (getName() == DT.getName())
-        return (resolve(DT.getTypeDerivedFrom()));
+    auto Elements = cast<MDCompositeTypeBase>(subType)->getElements();
+    for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+      auto *DT = cast<MDDerivedTypeBase>(Elements[i]);
+      if (getName() == DT->getName())
+        return resolve(DITypeRef(DT->getBaseType()));
     }
   }
   return Ty;
@@ -275,25 +277,25 @@ static StringRef getObjCMethodName(StringRef In) {
 // that do not have a DW_AT_name or DW_AT_linkage_name field - this
 // is only slightly different than the lookup of non-standard ObjC names.
 void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
-  if (!SP.isDefinition())
+  if (!SP->isDefinition())
     return;
-  addAccelName(SP.getName(), Die);
+  addAccelName(SP->getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output
   // that as well into the name table.
-  if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
-    addAccelName(SP.getLinkageName(), Die);
+  if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName())
+    addAccelName(SP->getLinkageName(), Die);
 
   // If this is an Objective-C selector name add it to the ObjC accelerator
   // too.
-  if (isObjCClass(SP.getName())) {
+  if (isObjCClass(SP->getName())) {
     StringRef Class, Category;
-    getObjCClassCategory(SP.getName(), Class, Category);
+    getObjCClassCategory(SP->getName(), Class, Category);
     addAccelObjC(Class, Die);
     if (Category != "")
       addAccelObjC(Category, Die);
     // Also add the base method name to the name table.
-    addAccelName(getObjCMethodName(SP.getName()), Die);
+    addAccelName(getObjCMethodName(SP->getName()), Die);
   }
 }
 
@@ -302,11 +304,10 @@ void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
 bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
   if (!Context)
     return false;
-  DIDescriptor D(Context);
-  if (D.isSubprogram())
+  if (isa<MDSubprogram>(Context))
     return true;
-  if (D.isType())
-    return isSubprogramContext(resolve(DIType(Context).getContext()));
+  if (auto *T = dyn_cast<MDType>(Context))
+    return isSubprogramContext(resolve(T->getScope()));
   return false;
 }
 
@@ -362,8 +363,8 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
 // Create new DwarfCompileUnit for the given metadata node with tag
 // DW_TAG_compile_unit.
 DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
-  StringRef FN = DIUnit.getFilename();
-  CompilationDir = DIUnit.getDirectory();
+  StringRef FN = DIUnit->getFilename();
+  CompilationDir = DIUnit->getDirectory();
 
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
@@ -381,9 +382,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
     Asm->OutStreamer.getContext().setMCLineTableCompilationDir(
         NewCU.getUniqueID(), CompilationDir);
 
-  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
+  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
   NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-                DIUnit.getLanguage());
+                DIUnit->getSourceLanguage());
   NewCU.addString(Die, dwarf::DW_AT_name, FN);
 
   if (!useSplitDwarf()) {
@@ -397,14 +398,14 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
     addGnuPubAttributes(NewCU, Die);
   }
 
-  if (DIUnit.isOptimized())
+  if (DIUnit->isOptimized())
     NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
 
-  StringRef Flags = DIUnit.getFlags();
+  StringRef Flags = DIUnit->getFlags();
   if (!Flags.empty())
     NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
 
-  if (unsigned RVer = DIUnit.getRunTimeVersion())
+  if (unsigned RVer = DIUnit->getRuntimeVersion())
     NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
                   dwarf::DW_FORM_data1, RVer);
 
@@ -420,9 +421,8 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
 
 void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
                                                   const MDNode *N) {
-  DIImportedEntity Module(N);
-  assert(Module.Verify());
-  if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext()))
+  DIImportedEntity Module = cast<MDImportedEntity>(N);
+  if (DIE *D = TheCU.getOrCreateContextDIE(Module->getScope()))
     D->addChild(TheCU.constructImportedEntityDIE(Module));
 }
 
@@ -445,44 +445,35 @@ void DwarfDebug::beginModule() {
   SingleCU = CU_Nodes->getNumOperands() == 1;
 
   for (MDNode *N : CU_Nodes->operands()) {
-    DICompileUnit CUNode(N);
+    DICompileUnit CUNode = cast<MDCompileUnit>(N);
     DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
-    DIArray ImportedEntities = CUNode.getImportedEntities();
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      ScopesWithImportedEntities.push_back(std::make_pair(
-          DIImportedEntity(ImportedEntities.getElement(i)).getContext(),
-          ImportedEntities.getElement(i)));
+    for (auto *IE : CUNode->getImportedEntities())
+      ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE));
     // Stable sort to preserve the order of appearance of imported entities.
     // This is to avoid out-of-order processing of interdependent declarations
     // within the same scope, e.g. { namespace A = base; namespace B = A; }
     std::stable_sort(ScopesWithImportedEntities.begin(),
                      ScopesWithImportedEntities.end(), less_first());
-    DIArray GVs = CUNode.getGlobalVariables();
-    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
-      CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
-    DIArray SPs = CUNode.getSubprograms();
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-      SPMap.insert(std::make_pair(SPs.getElement(i), &CU));
-    DIArray EnumTypes = CUNode.getEnumTypes();
-    for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(EnumTypes.getElement(i));
+    for (auto *GV : CUNode->getGlobalVariables())
+      CU.getOrCreateGlobalVariableDIE(GV);
+    for (auto *SP : CUNode->getSubprograms())
+      SPMap.insert(std::make_pair(SP, &CU));
+    for (DIType Ty : CUNode->getEnumTypes()) {
       // The enum types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
+      DIType UniqueTy = cast<MDType>(resolve(Ty->getRef()));
       CU.getOrCreateTypeDIE(UniqueTy);
     }
-    DIArray RetainedTypes = CUNode.getRetainedTypes();
-    for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(RetainedTypes.getElement(i));
+    for (DIType Ty : CUNode->getRetainedTypes()) {
       // The retained types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
+      DIType UniqueTy = cast<MDType>(resolve(Ty->getRef()));
       CU.getOrCreateTypeDIE(UniqueTy);
     }
     // Emit imported_modules last so that the relevant context is already
     // available.
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i));
+    for (auto *IE : CUNode->getImportedEntities())
+      constructAndAddImportedEntityDIE(CU, IE);
   }
 
   // Tell MMI that we have debug info.
@@ -498,7 +489,8 @@ void DwarfDebug::finishVariableDefinitions() {
     // DIE::getUnit isn't simple - it walks parent pointers, etc.
     DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
     assert(Unit);
-    DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable());
+    DbgVariable *AbsVar = getExistingAbstractVariable(
+        InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
     if (AbsVar && AbsVar->getDIE()) {
       Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
                         *AbsVar->getDIE());
@@ -510,7 +502,7 @@ void DwarfDebug::finishVariableDefinitions() {
 void DwarfDebug::finishSubprogramDefinitions() {
   for (const auto &P : SPMap)
     forBothCUs(*P.second, [&](DwarfCompileUnit &CU) {
-      CU.finishSubprogramDefinition(DISubprogram(P.first));
+      CU.finishSubprogramDefinition(cast<MDSubprogram>(P.first));
     });
 }
 
@@ -521,14 +513,12 @@ void DwarfDebug::collectDeadVariables() {
 
   if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
     for (MDNode *N : CU_Nodes->operands()) {
-      DICompileUnit TheCU(N);
+      DICompileUnit TheCU = cast<MDCompileUnit>(N);
       // Construct subprogram DIE and add variables DIEs.
       DwarfCompileUnit *SPCU =
           static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
       assert(SPCU && "Unable to find Compile Unit!");
-      DIArray Subprograms = TheCU.getSubprograms();
-      for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
-        DISubprogram SP(Subprograms.getElement(i));
+      for (auto *SP : TheCU->getSubprograms()) {
         if (ProcessedSPNodes.count(SP) != 0)
           continue;
         SPCU->collectDeadVariables(SP);
@@ -671,70 +661,71 @@ void DwarfDebug::endModule() {
 }
 
 // Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV,
+DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
                                                      DIVariable &Cleansed) {
-  LLVMContext &Ctx = DV->getContext();
   // More then one inlined variable corresponds to one abstract variable.
-  // FIXME: This duplication of variables when inlining should probably be
-  // removed. It's done to allow each DIVariable to describe its location
-  // because the DebugLoc on the dbg.value/declare isn't accurate. We should
-  // make it accurate then remove this duplication/cleansing stuff.
-  Cleansed = cleanseInlinedVariable(DV, Ctx);
+  Cleansed = IV.first;
   auto I = AbstractVariables.find(Cleansed);
   if (I != AbstractVariables.end())
     return I->second.get();
   return nullptr;
 }
 
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
+DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
   DIVariable Cleansed;
-  return getExistingAbstractVariable(DV, Cleansed);
+  return getExistingAbstractVariable(IV, Cleansed);
 }
 
 void DwarfDebug::createAbstractVariable(const DIVariable &Var,
                                         LexicalScope *Scope) {
-  auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this);
+  auto AbsDbgVariable =
+      make_unique<DbgVariable>(Var, nullptr, DIExpression(), this);
   InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
   AbstractVariables[Var] = std::move(AbsDbgVariable);
 }
 
-void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV,
+void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
                                                  const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+  DIVariable Cleansed;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode));
+  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+                                       cast<MDLocalScope>(ScopeNode)));
 }
 
-void
-DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
-                                                    const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
+    InlinedVariable IV, const MDNode *ScopeNode) {
+  DIVariable Cleansed;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode))
+  if (LexicalScope *Scope =
+          LScopes.findAbstractScope(cast_or_null<MDLocalScope>(ScopeNode)))
     createAbstractVariable(Cleansed, Scope);
 }
 
 // Collect variable information from side table maintained by MMI.
 void DwarfDebug::collectVariableInfoFromMMITable(
-    SmallPtrSetImpl<const MDNode *> &Processed) {
+    DenseSet<InlinedVariable> &Processed) {
   for (const auto &VI : MMI->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
-    Processed.insert(VI.Var);
+    assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+           "Expected inlined-at fields to agree");
+
+    InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+    Processed.insert(Var);
     LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
 
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    DIVariable DV(VI.Var);
-    DIExpression Expr(VI.Expr);
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
-    auto RegVar = make_unique<DbgVariable>(DV, Expr, this, VI.Slot);
+    DIExpression Expr = cast_or_null<MDExpression>(VI.Expr);
+    ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
+    auto RegVar =
+        make_unique<DbgVariable>(Var.first, Var.second, Expr, this, VI.Slot);
     if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
       ConcreteVariables.push_back(std::move(RegVar));
   }
@@ -768,12 +759,12 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
 
 /// Determine whether two variable pieces overlap.
 static bool piecesOverlap(DIExpression P1, DIExpression P2) {
-  if (!P1.isBitPiece() || !P2.isBitPiece())
+  if (!P1->isBitPiece() || !P2->isBitPiece())
     return true;
-  unsigned l1 = P1.getBitPieceOffset();
-  unsigned l2 = P2.getBitPieceOffset();
-  unsigned r1 = l1 + P1.getBitPieceSize();
-  unsigned r2 = l2 + P2.getBitPieceSize();
+  unsigned l1 = P1->getBitPieceOffset();
+  unsigned l2 = P2->getBitPieceOffset();
+  unsigned r1 = l1 + P1->getBitPieceSize();
+  unsigned r2 = l2 + P2->getBitPieceSize();
   // True where [l1,r1[ and [r1,r2[ overlap.
   return (l1 < r2) && (l2 < r1);
 }
@@ -845,7 +836,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     bool couldMerge = false;
 
     // If this is a piece, it may belong to the current DebugLocEntry.
-    if (DIExpr.isBitPiece()) {
+    if (DIExpr->isBitPiece()) {
       // Add this value to the list of open ranges.
       OpenRanges.push_back(Value);
 
@@ -884,35 +875,34 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
 
 
 // Find variables for each lexical scope.
-void
-DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                                SmallPtrSetImpl<const MDNode *> &Processed) {
+void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
+                                     DenseSet<InlinedVariable> &Processed) {
   // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMMITable(Processed);
 
   for (const auto &I : DbgValues) {
-    DIVariable DV(I.first);
-    if (Processed.count(DV))
+    InlinedVariable IV = I.first;
+    if (Processed.count(IV))
       continue;
 
-    // Instruction ranges, specifying where DV is accessible.
+    // Instruction ranges, specifying where IV is accessible.
     const auto &Ranges = I.second;
     if (Ranges.empty())
       continue;
 
     LexicalScope *Scope = nullptr;
-    if (MDNode *IA = DV.getInlinedAt())
-      Scope = LScopes.findInlinedScope(DV.getContext(), IA);
+    if (const MDLocation *IA = IV.second)
+      Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
     else
-      Scope = LScopes.findLexicalScope(DV.getContext());
+      Scope = LScopes.findLexicalScope(IV.first->getScope());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    Processed.insert(DV);
+    Processed.insert(IV);
     const MachineInstr *MInsn = Ranges.front().first;
     assert(MInsn->isDebugValue() && "History must begin with debug value");
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    ensureAbstractVariableIsCreatedIfScoped(IV, Scope->getScopeNode());
     ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
     DbgVariable *RegVar = ConcreteVariables.back().get();
     InfoHolder.addScopeVariable(Scope, RegVar);
@@ -937,16 +927,15 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
   }
 
   // Collect info for variables that were optimized out.
-  DIArray Variables = SP.getVariables();
-  for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-    DIVariable DV(Variables.getElement(i));
-    assert(DV.isVariable());
-    if (!Processed.insert(DV).second)
+  for (DIVariable DV : SP->getVariables()) {
+    if (!Processed.insert(InlinedVariable(DV, nullptr)).second)
       continue;
-    if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
-      ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) {
+      ensureAbstractVariableIsCreatedIfScoped(InlinedVariable(DV, nullptr),
+                                              Scope->getScopeNode());
       DIExpression NoExpr;
-      ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this));
+      ConcreteVariables.push_back(
+          make_unique<DbgVariable>(DV, nullptr, NoExpr, this));
       InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get());
     }
   }
@@ -972,7 +961,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
     if (DL != PrevInstLoc) {
-      if (!DL.isUnknown()) {
+      if (DL) {
         unsigned Flags = 0;
         PrevInstLoc = DL;
         if (DL == PrologEndLoc) {
@@ -984,7 +973,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
             Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
           Flags |= DWARF2_FLAG_IS_STMT;
 
-        const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+        const MDNode *Scope = DL.getScope();
         recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
       } else if (UnknownLocations) {
         PrevInstLoc = DL;
@@ -1072,7 +1061,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
   for (const auto &MBB : *MF)
     for (const auto &MI : MBB)
       if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
-          !MI.getDebugLoc().isUnknown()) {
+          MI.getDebugLoc()) {
         // Did the target forget to set the FrameSetup flag for CFI insns?
         assert(!MI.isCFIInstruction() &&
                "First non-frame-setup instruction is a CFI instruction.");
@@ -1137,11 +1126,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
     // The first mention of a function argument gets the CurrentFnBegin
     // label, so arguments are visible when breaking at function entry.
-    DIVariable DIVar(Ranges.front().first->getDebugVariable());
-    if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
-        getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
+    DIVariable DIVar = Ranges.front().first->getDebugVariable();
+    if (DIVar->getTag() == dwarf::DW_TAG_arg_variable &&
+        getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
       LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
-      if (Ranges.front().first->getDebugExpression().isBitPiece()) {
+      if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
         // Mark all non-overlapping initial pieces.
         for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
           DIExpression Piece = I->first->getDebugExpression();
@@ -1168,15 +1157,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
   // Record beginning of function.
   PrologEndLoc = findPrologueEndLoc(MF);
-  if (!PrologEndLoc.isUnknown()) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
-
+  if (MDLocation *L = PrologEndLoc) {
     // We'd like to list the prologue as "not statements" but GDB behaves
     // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
-    recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
-                     FnStartDL.getScope(MF->getFunction()->getContext()),
-                     DWARF2_FLAG_IS_STMT);
+    auto *SP = L->getInlinedAtScope()->getSubprogram();
+    recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
   }
 }
 
@@ -1199,10 +1184,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
 
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  DISubprogram SP(FnScope->getScopeNode());
+  DISubprogram SP = cast<MDSubprogram>(FnScope->getScopeNode());
   DwarfCompileUnit &TheCU = *SPMap.lookup(SP);
 
-  SmallPtrSet<const MDNode *, 16> ProcessedVars;
+  DenseSet<InlinedVariable> ProcessedVars;
   collectVariableInfo(TheCU, SP, ProcessedVars);
 
   // Add the range of this function to the list of ranges for the CU.
@@ -1210,7 +1195,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 
   // Under -gmlt, skip building the subprogram if there are no inlined
   // subroutines inside it.
-  if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly &&
+  if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly &&
       LScopes.getAbstractScopesList().empty() && !IsDarwin) {
     assert(InfoHolder.getScopeVariables().empty());
     assert(DbgValues.empty());
@@ -1229,16 +1214,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 #endif
   // Construct abstract scopes.
   for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
-    DISubprogram SP(AScope->getScopeNode());
-    assert(SP.isSubprogram());
+    DISubprogram SP = cast<MDSubprogram>(AScope->getScopeNode());
     // Collect info for variables that were optimized out.
-    DIArray Variables = SP.getVariables();
-    for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-      DIVariable DV(Variables.getElement(i));
-      assert(DV && DV.isVariable());
-      if (!ProcessedVars.insert(DV).second)
+    for (DIVariable DV : SP->getVariables()) {
+      if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
         continue;
-      ensureAbstractVariableIsCreated(DV, DV.getContext());
+      ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
+                                      DV->getScope());
       assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
              && "ensureAbstractVariableIsCreated inserted abstract scopes");
     }
@@ -1270,12 +1252,11 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
   StringRef Dir;
   unsigned Src = 1;
   unsigned Discriminator = 0;
-  if (DIScope Scope = DIScope(S)) {
-    assert(Scope.isScope());
-    Fn = Scope.getFilename();
-    Dir = Scope.getDirectory();
-    if (Scope.isLexicalBlockFile())
-      Discriminator = DILexicalBlockFile(S).getDiscriminator();
+  if (auto *Scope = cast_or_null<MDScope>(S)) {
+    Fn = Scope->getFilename();
+    Dir = Scope->getDirectory();
+    if (auto *LBF = dyn_cast<MDLexicalBlockFile>(Scope))
+      Discriminator = LBF->getDiscriminator();
 
     unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
     Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
@@ -1499,23 +1480,25 @@ static void emitDebugLocValue(const AsmPrinter &AP,
                                     Streamer);
   // Regular entry.
   if (Value.isInt()) {
-    DIBasicType BTy(DV.getType().resolve(TypeIdentifierMap));
-    if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
-                         BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+    MDType *T = DV->getType().resolve(TypeIdentifierMap);
+    auto *B = dyn_cast<MDBasicType>(T);
+    if (B && (B->getEncoding() == dwarf::DW_ATE_signed ||
+              B->getEncoding() == dwarf::DW_ATE_signed_char))
       DwarfExpr.AddSignedConstant(Value.getInt());
     else
       DwarfExpr.AddUnsignedConstant(Value.getInt());
   } else if (Value.isLocation()) {
     MachineLocation Loc = Value.getLoc();
     DIExpression Expr = Value.getExpression();
-    if (!Expr || (Expr.getNumElements() == 0))
+    if (!Expr || !Expr->getNumElements())
       // Regular entry.
       AP.EmitDwarfRegOp(Streamer, Loc);
     else {
       // Complex address entry.
       if (Loc.getOffset()) {
         DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset());
-        DwarfExpr.AddExpression(Expr.begin(), Expr.end(), PieceOffsetInBits);
+        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
+                                PieceOffsetInBits);
       } else
         DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(),
                                           PieceOffsetInBits);
@@ -1542,8 +1525,8 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
     unsigned Offset = 0;
     for (auto Piece : Values) {
       DIExpression Expr = Piece.getExpression();
-      unsigned PieceOffset = Expr.getBitPieceOffset();
-      unsigned PieceSize = Expr.getBitPieceSize();
+      unsigned PieceOffset = Expr->getBitPieceOffset();
+      unsigned PieceSize = Expr->getBitPieceSize();
       assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
       if (Offset < PieceOffset) {
         // The DWARF spec seriously mandates pieces with no locations for gaps.
@@ -1554,15 +1537,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
         Offset += PieceOffset-Offset;
       }
       Offset += PieceSize;
-   
-#ifndef NDEBUG
-      DIVariable Var = Piece.getVariable();
-      unsigned VarSize = Var.getSizeInBits(TypeIdentifierMap);
-      assert(PieceSize+PieceOffset <= VarSize
-             && "piece is larger than or outside of variable");
-      assert(PieceSize != VarSize
-             && "piece covers entire variable");
-#endif
+
       emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Piece, PieceOffset);
     }
   } else {
@@ -1850,7 +1825,7 @@ void DwarfDebug::emitDebugRanges() {
 void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
                                   std::unique_ptr<DwarfUnit> NewU) {
   NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
-                  U.getCUNode().getSplitDebugFilename());
+                  U.getCUNode()->getSplitDebugFilename());
 
   if (!CompilationDir.empty())
     NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -1914,7 +1889,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
   if (!useSplitDwarf())
     return nullptr;
   if (SingleCU)
-    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory());
+    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory());
   return &SplitTypeUnitFileTable;
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 74db3ef..e067f41 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -21,6 +21,7 @@
 #include "DwarfAccelTable.h"
 #include "DwarfFile.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -74,7 +75,8 @@ public:
 /// - Variables that are described by multiple MMI table entries have multiple
 ///   expressions and frame indices.
 class DbgVariable {
-  DIVariable Var;             /// Variable Descriptor.
+  DIVariable Var;                    /// Variable Descriptor.
+  DILocation IA;                     /// Inlined at location.
   SmallVector<DIExpression, 1> Expr; /// Complex address location expression.
   DIE *TheDIE;                /// Variable DIE.
   unsigned DotDebugLocOffset; /// Offset in DotDebugLocEntries.
@@ -84,11 +86,11 @@ class DbgVariable {
 
 public:
   /// Construct a DbgVariable from a DIVariable.
-    DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD, int FI = ~0)
-    : Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U),
-      MInsn(nullptr), DD(DD) {
+  DbgVariable(DIVariable V, DILocation IA, DIExpression E, DwarfDebug *DD,
+              int FI = ~0)
+      : Var(V), IA(IA), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U),
+        MInsn(nullptr), DD(DD) {
     FrameIndex.push_back(FI);
-    assert(Var.Verify());
     assert(!E || E->isValid());
   }
 
@@ -96,6 +98,7 @@ public:
   /// AbstractVar may be NULL.
   DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
       : Var(DbgValue->getDebugVariable()),
+        IA(DbgValue->getDebugLoc()->getInlinedAt()),
         Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr),
         DotDebugLocOffset(~0U), MInsn(DbgValue), DD(DD) {
     FrameIndex.push_back(~0);
@@ -103,12 +106,13 @@ public:
 
   // Accessors.
   DIVariable getVariable() const { return Var; }
+  DILocation getInlinedAt() const { return IA; }
   const ArrayRef<DIExpression> getExpression() const { return Expr; }
   void setDIE(DIE &D) { TheDIE = &D; }
   DIE *getDIE() const { return TheDIE; }
   void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
   unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
-  StringRef getName() const { return Var.getName(); }
+  StringRef getName() const { return Var->getName(); }
   const MachineInstr *getMInsn() const { return MInsn; }
   const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
 
@@ -116,6 +120,7 @@ public:
     assert(  DotDebugLocOffset == ~0U &&   !MInsn && "not an MMI entry");
     assert(V.DotDebugLocOffset == ~0U && !V.MInsn && "not an MMI entry");
     assert(V.Var == Var && "conflicting DIVariable");
+    assert(V.IA == IA && "conflicting inlined-at location");
 
     if (V.getFrameIndex().back() != ~0) {
       auto E = V.getExpression();
@@ -124,40 +129,40 @@ public:
       FrameIndex.append(FI.begin(), FI.end());
     }
     assert(Expr.size() > 1
-           ? std::all_of(Expr.begin(), Expr.end(),
-                         [](DIExpression &E) { return E.isBitPiece(); })
-           : (true && "conflicting locations for variable"));
+               ? std::all_of(Expr.begin(), Expr.end(),
+                             [](DIExpression &E) { return E->isBitPiece(); })
+               : (true && "conflicting locations for variable"));
   }
 
   // Translate tag to proper Dwarf tag.
   dwarf::Tag getTag() const {
-    if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+    if (Var->getTag() == dwarf::DW_TAG_arg_variable)
       return dwarf::DW_TAG_formal_parameter;
 
     return dwarf::DW_TAG_variable;
   }
   /// \brief Return true if DbgVariable is artificial.
   bool isArtificial() const {
-    if (Var.isArtificial())
+    if (Var->isArtificial())
       return true;
-    if (getType().isArtificial())
+    if (getType()->isArtificial())
       return true;
     return false;
   }
 
   bool isObjectPointer() const {
-    if (Var.isObjectPointer())
+    if (Var->isObjectPointer())
       return true;
-    if (getType().isObjectPointer())
+    if (getType()->isObjectPointer())
       return true;
     return false;
   }
 
   bool variableHasComplexAddress() const {
-    assert(Var.isVariable() && "Invalid complex DbgVariable!");
+    assert(Var && "Invalid complex DbgVariable!");
     assert(Expr.size() == 1 &&
            "variableHasComplexAddress() invoked on multi-FI variable");
-    return Expr.back().getNumElements() > 0;
+    return Expr.back()->getNumElements() > 0;
   }
   bool isBlockByrefVariable() const;
   DIType getType() const;
@@ -165,7 +170,7 @@ public:
 private:
   /// resolve - Look in the DwarfDebug map for the MDNode that
   /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const;
+  template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const;
 };
 
 
@@ -324,14 +329,16 @@ class DwarfDebug : public AsmPrinterHandler {
     return InfoHolder.getUnits();
   }
 
+  typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+
   /// \brief Find abstract variable associated with Var.
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV,
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
                                            DIVariable &Cleansed);
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV);
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
   void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope);
-  void ensureAbstractVariableIsCreated(const DIVariable &Var,
+  void ensureAbstractVariableIsCreated(InlinedVariable Var,
                                        const MDNode *Scope);
-  void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
+  void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
                                                const MDNode *Scope);
 
   /// \brief Construct a DIE for this abstract scope.
@@ -461,7 +468,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Populate LexicalScope entries with variables' info.
   void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                           SmallPtrSetImpl<const MDNode *> &ProcessedVars);
+                           DenseSet<InlinedVariable> &ProcessedVars);
 
   /// \brief Build the location list for all DBG_VALUEs in the
   /// function that describe the same variable.
@@ -470,7 +477,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Collect variable information from the side table maintained
   /// by MMI.
-  void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P);
+  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
 
   /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
@@ -567,7 +574,7 @@ public:
   void emitDebugLocEntryLocation(const DebugLocEntry &Entry);
 
   /// Find the MDNode for the given reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const {
     return Ref.resolve(TypeIdentifierMap);
   }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 6eaf707..a4fd36f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -48,7 +48,7 @@ public:
   // Main entry points.
   //
   DwarfCFIException(AsmPrinter *A);
-  virtual ~DwarfCFIException();
+  ~DwarfCFIException() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
@@ -70,7 +70,7 @@ public:
   // Main entry points.
   //
   ARMException(AsmPrinter *A);
-  virtual ~ARMException();
+  ~ARMException() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 489e455..e576c93 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -195,27 +195,27 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits,
 bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
                                               unsigned MachineReg,
                                               unsigned PieceOffsetInBits) {
-  auto I = Expr.begin();
-  auto E = Expr.end();
+  auto I = Expr->expr_op_begin();
+  auto E = Expr->expr_op_end();
   if (I == E)
     return AddMachineRegPiece(MachineReg);
 
   // Pattern-match combinations for which more efficient representations exist
   // first.
   bool ValidReg = false;
-  switch (*I) {
+  switch (I->getOp()) {
   case dwarf::DW_OP_bit_piece: {
-    unsigned OffsetInBits = I->getArg(1);
-    unsigned SizeInBits   = I->getArg(2);
+    unsigned OffsetInBits = I->getArg(0);
+    unsigned SizeInBits   = I->getArg(1);
     // Piece always comes at the end of the expression.
     return AddMachineRegPiece(MachineReg, SizeInBits,
                getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
   }
   case dwarf::DW_OP_plus: {
     // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
-    auto N = I->getNext();
-    if ((N != E) && (*N == dwarf::DW_OP_deref)) {
-      unsigned Offset = I->getArg(1);
+    auto N = I.getNext();
+    if (N != E && N->getOp() == dwarf::DW_OP_deref) {
+      unsigned Offset = I->getArg(0);
       ValidReg = AddMachineRegIndirect(MachineReg, Offset);
       std::advance(I, 2);
       break;
@@ -240,20 +240,20 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
   return true;
 }
 
-void DwarfExpression::AddExpression(DIExpression::iterator I,
-                                    DIExpression::iterator E,
+void DwarfExpression::AddExpression(MDExpression::expr_op_iterator I,
+                                    MDExpression::expr_op_iterator E,
                                     unsigned PieceOffsetInBits) {
   for (; I != E; ++I) {
-    switch (*I) {
+    switch (I->getOp()) {
     case dwarf::DW_OP_bit_piece: {
-      unsigned OffsetInBits = I->getArg(1);
-      unsigned SizeInBits   = I->getArg(2);
+      unsigned OffsetInBits = I->getArg(0);
+      unsigned SizeInBits   = I->getArg(1);
       AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
       break;
     }
     case dwarf::DW_OP_plus:
       EmitOp(dwarf::DW_OP_plus_uconst);
-      EmitUnsigned(I->getArg(1));
+      EmitUnsigned(I->getArg(0));
       break;
     case dwarf::DW_OP_deref:
       EmitOp(dwarf::DW_OP_deref);
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 985d52c..a8b65f5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -97,7 +97,8 @@ public:
   /// Emit a the operations remaining the DIExpressionIterator I.
   /// \param PieceOffsetInBits If this is one piece out of a fragmented
   /// location, this is the offset of the piece inside the entire variable.
-  void AddExpression(DIExpression::iterator I, DIExpression::iterator E,
+  void AddExpression(MDExpression::expr_op_iterator I,
+                     MDExpression::expr_op_iterator E,
                      unsigned PieceOffsetInBits = 0);
 };
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 60acc58e..32adb40 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -139,7 +139,7 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
   DIVariable DV = Var->getVariable();
   // Variables with positive arg numbers are parameters.
-  if (unsigned ArgNum = DV.getArgNumber()) {
+  if (unsigned ArgNum = DV->getArg()) {
     // Keep all parameters in order at the start of the variable list to ensure
     // function types are correct (no out-of-order parameters)
     //
@@ -149,7 +149,7 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
     // rather than linear search.
     auto I = Vars.begin();
     while (I != Vars.end()) {
-      unsigned CurNum = (*I)->getVariable().getArgNumber();
+      unsigned CurNum = (*I)->getVariable()->getArg();
       // A local (non-parameter) variable has been found, insert immediately
       // before it.
       if (CurNum == 0)
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index f6af73f..9154652 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -175,8 +175,8 @@ static bool isShareableAcrossCUs(DIDescriptor D) {
   // level already) but may be implementable for some value in projects
   // building multiple independent libraries with LTO and then linking those
   // together.
-  return (D.isType() ||
-          (D.isSubprogram() && !DISubprogram(D).isDefinition())) &&
+  return (isa<MDType>(D) ||
+          (isa<MDSubprogram>(D) && !cast<MDSubprogram>(D)->isDefinition())) &&
          !GenerateDwarfTypeUnits;
 }
 
@@ -397,52 +397,48 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) {
-  assert(V.isVariable());
+  assert(V);
 
-  addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(),
-                V.getContext().getDirectory());
+  addSourceLine(Die, V->getLine(), V->getScope()->getFilename(),
+                V->getScope()->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) {
-  assert(G.isGlobalVariable());
+  assert(G);
 
-  addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory());
+  addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) {
-  assert(SP.isSubprogram());
+  assert(SP);
 
-  addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory());
+  addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) {
-  assert(Ty.isType());
+  assert(Ty);
 
-  addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) {
-  assert(Ty.isObjCProperty());
+  assert(Ty);
 
-  DIFile File = Ty.getFile();
-  addSourceLine(Die, Ty.getLineNumber(), File.getFilename(),
-                File.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
-  assert(NS.Verify());
-
-  addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
+  addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
 }
 
 /// addRegisterOp - Add register operand.
@@ -525,28 +521,26 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
                                      const MachineLocation &Location) {
   DIType Ty = DV.getType();
   DIType TmpTy = Ty;
-  uint16_t Tag = Ty.getTag();
+  uint16_t Tag = Ty->getTag();
   bool isPointer = false;
 
   StringRef varName = DV.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy(Ty);
-    TmpTy = resolve(DTy.getTypeDerivedFrom());
+    DIDerivedType DTy = cast<MDDerivedType>(Ty);
+    TmpTy = resolve(DTy->getBaseType());
     isPointer = true;
   }
 
-  DICompositeType blockStruct(TmpTy);
-
   // Find the __forwarding field and the variable field in the __Block_byref
   // struct.
-  DIArray Fields = blockStruct.getElements();
+  DIArray Fields = cast<MDCompositeTypeBase>(TmpTy)->getElements();
   DIDerivedType varField;
   DIDerivedType forwardingField;
 
-  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
-    DIDerivedType DT(Fields.getElement(i));
-    StringRef fieldName = DT.getName();
+  for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
+    DIDerivedType DT = cast<MDDerivedTypeBase>(Fields[i]);
+    StringRef fieldName = DT->getName();
     if (fieldName == "__forwarding")
       forwardingField = DT;
     else if (fieldName == varName)
@@ -554,8 +548,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   }
 
   // Get the offsets for the forwarding field and the variable field.
-  unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3;
-  unsigned varFieldOffset = varField.getOffsetInBits() >> 2;
+  unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
+  unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
@@ -601,9 +595,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
 
 /// Return true if type encoding is unsigned.
 static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
-  DIDerivedType DTy(Ty);
-  if (DTy.isDerivedType()) {
-    dwarf::Tag T = (dwarf::Tag)Ty.getTag();
+  if (DIDerivedType DTy = dyn_cast<MDDerivedTypeBase>(Ty)) {
+    dwarf::Tag T = (dwarf::Tag)Ty->getTag();
     // Encode pointer constants as unsigned bytes. This is used at least for
     // null pointer constant emission.
     // (Pieces of) aggregate types that get hacked apart by SROA may also be
@@ -624,56 +617,55 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
            T == dwarf::DW_TAG_volatile_type ||
            T == dwarf::DW_TAG_restrict_type ||
            T == dwarf::DW_TAG_enumeration_type);
-    if (DITypeRef Deriv = DTy.getTypeDerivedFrom())
+    if (DITypeRef Deriv = DTy->getBaseType())
       return isUnsignedDIType(DD, DD->resolve(Deriv));
     // FIXME: Enums without a fixed underlying type have unknown signedness
     // here, leading to incorrectly emitted constants.
-    assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type);
+    assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type);
     return false;
   }
 
-  DIBasicType BTy(Ty);
-  assert(BTy.isBasicType());
-  unsigned Encoding = BTy.getEncoding();
+  DIBasicType BTy = cast<MDBasicType>(Ty);
+  unsigned Encoding = BTy->getEncoding();
   assert((Encoding == dwarf::DW_ATE_unsigned ||
           Encoding == dwarf::DW_ATE_unsigned_char ||
           Encoding == dwarf::DW_ATE_signed ||
           Encoding == dwarf::DW_ATE_signed_char ||
-          Encoding == dwarf::DW_ATE_float ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          (Ty.getTag() == dwarf::DW_TAG_unspecified_type &&
-           Ty.getName() == "decltype(nullptr)")) &&
+          Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+          Encoding == dwarf::DW_ATE_boolean ||
+          (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+           Ty->getName() == "decltype(nullptr)")) &&
          "Unsupported encoding");
-  return (Encoding == dwarf::DW_ATE_unsigned ||
-          Encoding == dwarf::DW_ATE_unsigned_char ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          Ty.getTag() == dwarf::DW_TAG_unspecified_type);
+  return Encoding == dwarf::DW_ATE_unsigned ||
+         Encoding == dwarf::DW_ATE_unsigned_char ||
+         Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+         Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
 /// If this type is derived from a base type then return base type size.
 static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
-  unsigned Tag = Ty.getTag();
+  unsigned Tag = Ty->getTag();
 
   if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
       Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
       Tag != dwarf::DW_TAG_restrict_type)
-    return Ty.getSizeInBits();
+    return Ty->getSizeInBits();
 
-  DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
+  auto *BaseType = DD->resolve(Ty->getBaseType());
 
-  assert(BaseType.isValid() && "Unexpected invalid base type");
+  assert(BaseType && "Unexpected invalid base type");
 
   // If this is a derived type, go ahead and get the base type, unless it's a
   // reference then it's just the size of the field. Pointer types have no need
   // of this since they're a different type of qualification on the type.
-  if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
-      BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
-    return Ty.getSizeInBits();
+  if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
+      BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
+    return Ty->getSizeInBits();
 
-  if (BaseType.isDerivedType())
-    return getBaseTypeSize(DD, DIDerivedType(BaseType));
+  if (auto *DT = dyn_cast<MDDerivedTypeBase>(BaseType))
+    return getBaseTypeSize(DD, DT);
 
-  return BaseType.getSizeInBits();
+  return BaseType->getSizeInBits();
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
@@ -771,39 +763,37 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
 /// addTemplateParams - Add template parameters into buffer.
 void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
   // Add template parameters.
-  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
-    DIDescriptor Element = TParams.getElement(i);
-    if (Element.isTemplateTypeParameter())
-      constructTemplateTypeParameterDIE(Buffer,
-                                        DITemplateTypeParameter(Element));
-    else if (Element.isTemplateValueParameter())
-      constructTemplateValueParameterDIE(Buffer,
-                                         DITemplateValueParameter(Element));
+  for (unsigned i = 0, e = TParams.size(); i != e; ++i) {
+    DIDescriptor Element = TParams[i];
+    if (auto *TTP = dyn_cast<MDTemplateTypeParameter>(Element))
+      constructTemplateTypeParameterDIE(Buffer, TTP);
+    else if (auto *TVP = dyn_cast<MDTemplateValueParameter>(Element))
+      constructTemplateValueParameterDIE(Buffer, TVP);
   }
 }
 
 /// getOrCreateContextDIE - Get context owner's DIE.
 DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) {
-  if (!Context || Context.isFile())
+  if (!Context || isa<MDFile>(Context))
     return &getUnitDie();
-  if (Context.isType())
-    return getOrCreateTypeDIE(DIType(Context));
-  if (Context.isNameSpace())
-    return getOrCreateNameSpace(DINameSpace(Context));
-  if (Context.isSubprogram())
-    return getOrCreateSubprogramDIE(DISubprogram(Context));
+  if (auto *T = dyn_cast<MDType>(Context))
+    return getOrCreateTypeDIE(T);
+  if (auto *NS = dyn_cast<MDNamespace>(Context))
+    return getOrCreateNameSpace(NS);
+  if (auto *SP = dyn_cast<MDSubprogram>(Context))
+    return getOrCreateSubprogramDIE(SP);
   return getDIE(Context);
 }
 
 DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) {
-  DIScope Context = resolve(Ty.getContext());
+  DIScope Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
 
   if (DIE *TyDIE = getDIE(Ty))
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
   constructTypeDIE(TyDIE, Ty);
 
@@ -817,18 +807,18 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (!TyNode)
     return nullptr;
 
-  DIType Ty(TyNode);
-  assert(Ty.isType());
-  assert(Ty == resolve(Ty.getRef()) &&
+  auto *Ty = cast<MDType>(TyNode);
+  assert(Ty == resolve(Ty->getRef()) &&
          "type was not uniqued, possible ODR violation.");
 
   // DW_TAG_restrict_type is not supported in DWARF2
-  if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
-    return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom()));
+  if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
+    return getOrCreateTypeDIE(
+        resolve(DITypeRef(cast<MDDerivedType>(Ty)->getBaseType())));
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIScope Context = resolve(Ty.getContext());
+  DIScope Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
   assert(ContextDIE);
 
@@ -836,24 +826,22 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
   updateAcceleratorTables(Context, Ty, TyDIE);
 
-  if (Ty.isBasicType())
-    constructTypeDIE(TyDIE, DIBasicType(Ty));
-  else if (Ty.isCompositeType()) {
-    DICompositeType CTy(Ty);
-    if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
-      if (MDString *TypeId = CTy.getIdentifier()) {
+  if (auto *BT = dyn_cast<MDBasicType>(Ty))
+    constructTypeDIE(TyDIE, BT);
+  else if (DICompositeType CTy = dyn_cast<MDCompositeTypeBase>(Ty)) {
+    if (GenerateDwarfTypeUnits && !Ty->isForwardDecl())
+      if (MDString *TypeId = CTy->getRawIdentifier()) {
         DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
         // Skip updating the accelerator tables since this is not the full type.
         return &TyDIE;
       }
     constructTypeDIE(TyDIE, CTy);
   } else {
-    assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(TyDIE, DIDerivedType(Ty));
+    constructTypeDIE(TyDIE, cast<MDDerivedType>(Ty));
   }
 
   return &TyDIE;
@@ -861,19 +849,18 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
 
 void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
                                         const DIE &TyDIE) {
-  if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+  if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
     bool IsImplementation = 0;
-    if (Ty.isCompositeType()) {
-      DICompositeType CT(Ty);
+    if (auto *CT = dyn_cast<MDCompositeTypeBase>(Ty)) {
       // A runtime language of 0 actually means C/C++ and that any
       // non-negative value is some version of Objective-C/C++.
-      IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete();
+      IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
     }
     unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
-    DD->addAccelType(Ty.getName(), TyDIE, Flags);
+    DD->addAccelType(Ty->getName(), TyDIE, Flags);
 
-    if (!Context || Context.isCompileUnit() || Context.isFile() ||
-        Context.isNameSpace())
+    if (!Context || isa<MDCompileUnit>(Context) || isa<MDFile>(Context) ||
+        isa<MDNamespace>(Context))
       addGlobalType(Ty, TyDIE, Context);
   }
 }
@@ -914,10 +901,10 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
 
   std::string CS;
   SmallVector<DIScope, 1> Parents;
-  while (!Context.isCompileUnit()) {
+  while (!isa<MDCompileUnit>(Context)) {
     Parents.push_back(Context);
-    if (Context.getContext())
-      Context = resolve(Context.getContext());
+    if (Context->getScope())
+      Context = resolve(Context->getScope());
     else
       // Structure, etc types will have a NULL context if they're at the top
       // level.
@@ -929,9 +916,9 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
   for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(),
                                                   E = Parents.rend();
        I != E; ++I) {
-    DIScope Ctx = *I;
-    StringRef Name = Ctx.getName();
-    if (Name.empty() && Ctx.isNameSpace())
+    const MDScope *Ctx = *I;
+    StringRef Name = Ctx->getName();
+    if (Name.empty() && isa<MDNamespace>(Ctx))
       Name = "(anonymous namespace)";
     if (!Name.empty()) {
       CS += Name;
@@ -944,31 +931,31 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
 /// constructTypeDIE - Construct basic type die from DIBasicType.
 void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   // Get core information.
-  StringRef Name = BTy.getName();
+  StringRef Name = BTy->getName();
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
     addString(Buffer, dwarf::DW_AT_name, Name);
 
   // An unspecified type only has a name attribute.
-  if (BTy.getTag() == dwarf::DW_TAG_unspecified_type)
+  if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
     return;
 
   addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-          BTy.getEncoding());
+          BTy->getEncoding());
 
-  uint64_t Size = BTy.getSizeInBits() >> 3;
+  uint64_t Size = BTy->getSizeInBits() >> 3;
   addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 }
 
 /// constructTypeDIE - Construct derived type die from DIDerivedType.
 void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
   // Get core information.
-  StringRef Name = DTy.getName();
-  uint64_t Size = DTy.getSizeInBits() >> 3;
+  StringRef Name = DTy->getName();
+  uint64_t Size = DTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   // Map to main type, void will not have a type.
-  DIType FromTy = resolve(DTy.getTypeDerivedFrom());
+  DIType FromTy = resolve(DTy->getBaseType());
   if (FromTy)
     addType(Buffer, FromTy);
 
@@ -982,24 +969,25 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
     addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 
   if (Tag == dwarf::DW_TAG_ptr_to_member_type)
-    addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
-                *getOrCreateTypeDIE(resolve(DTy.getClassType())));
+    addDIEEntry(
+        Buffer, dwarf::DW_AT_containing_type,
+        *getOrCreateTypeDIE(resolve(cast<MDDerivedType>(DTy)->getClassType())));
   // Add source line info if available and TyDesc is not a forward declaration.
-  if (!DTy.isForwardDecl())
+  if (!DTy->isForwardDecl())
     addSourceLine(Buffer, DTy);
 }
 
 /// constructSubprogramArguments - Construct function argument DIEs.
 void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
-  for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-    DIType Ty = resolve(Args.getElement(i));
+  for (unsigned i = 1, N = Args.size(); i < N; ++i) {
+    DIType Ty = resolve(Args[i]);
     if (!Ty) {
       assert(i == N-1 && "Unspecified parameter must be the last argument");
       createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
     } else {
       DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
       addType(Arg, Ty);
-      if (Ty.isArtificial())
+      if (Ty->isArtificial())
         addFlag(Arg, dwarf::DW_AT_artificial);
     }
   }
@@ -1008,9 +996,9 @@ void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
 /// constructTypeDIE - Construct type DIE from DICompositeType.
 void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   // Add name if not anonymous or intermediate type.
-  StringRef Name = CTy.getName();
+  StringRef Name = CTy->getName();
 
-  uint64_t Size = CTy.getSizeInBits() >> 3;
+  uint64_t Size = CTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   switch (Tag) {
@@ -1022,14 +1010,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     break;
   case dwarf::DW_TAG_subroutine_type: {
     // Add return type. A void return won't have a type.
-    DITypeArray Elements = DISubroutineType(CTy).getTypeArray();
-    DIType RTy(resolve(Elements.getElement(0)));
-    if (RTy)
-      addType(Buffer, RTy);
+    auto Elements = cast<MDSubroutineType>(CTy)->getTypeArray();
+    if (Elements.size())
+      if (auto RTy = resolve(Elements[0]))
+        addType(Buffer, RTy);
 
     bool isPrototyped = true;
-    if (Elements.getNumElements() == 2 &&
-        !Elements.getElement(1))
+    if (Elements.size() == 2 && !Elements[1])
       isPrototyped = false;
 
     constructSubprogramArguments(Buffer, Elements);
@@ -1042,60 +1029,46 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
          Language == dwarf::DW_LANG_ObjC))
       addFlag(Buffer, dwarf::DW_AT_prototyped);
 
-    if (CTy.isLValueReference())
+    if (CTy->isLValueReference())
       addFlag(Buffer, dwarf::DW_AT_reference);
 
-    if (CTy.isRValueReference())
+    if (CTy->isRValueReference())
       addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
   } break;
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
   case dwarf::DW_TAG_class_type: {
     // Add elements to structure type.
-    DIArray Elements = CTy.getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDescriptor Element = Elements.getElement(i);
-      if (Element.isSubprogram())
-        getOrCreateSubprogramDIE(DISubprogram(Element));
-      else if (Element.isDerivedType()) {
-        DIDerivedType DDTy(Element);
-        if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+    DIArray Elements = CTy->getElements();
+    for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+      DIDescriptor Element = Elements[i];
+      if (!Element)
+        continue;
+      if (auto *SP = dyn_cast<MDSubprogram>(Element))
+        getOrCreateSubprogramDIE(SP);
+      else if (DIDerivedType DDTy = dyn_cast<MDDerivedTypeBase>(Element)) {
+        if (DDTy->getTag() == dwarf::DW_TAG_friend) {
           DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
-          addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()),
-                  dwarf::DW_AT_friend);
-        } else if (DDTy.isStaticMember()) {
+          addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+        } else if (DDTy->isStaticMember()) {
           getOrCreateStaticMemberDIE(DDTy);
         } else {
           constructMemberDIE(Buffer, DDTy);
         }
-      } else if (Element.isObjCProperty()) {
-        DIObjCProperty Property(Element);
-        DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer);
-        StringRef PropertyName = Property.getObjCPropertyName();
+      } else if (DIObjCProperty Property = dyn_cast<MDObjCProperty>(Element)) {
+        DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
+        StringRef PropertyName = Property->getName();
         addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
-        if (Property.getType())
-          addType(ElemDie, Property.getType());
+        if (Property->getType())
+          addType(ElemDie, Property->getType());
         addSourceLine(ElemDie, Property);
-        StringRef GetterName = Property.getObjCPropertyGetterName();
+        StringRef GetterName = Property->getGetterName();
         if (!GetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
-        StringRef SetterName = Property.getObjCPropertySetterName();
+        StringRef SetterName = Property->getSetterName();
         if (!SetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
-        unsigned PropertyAttributes = 0;
-        if (Property.isReadOnlyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
-        if (Property.isReadWriteObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
-        if (Property.isAssignObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
-        if (Property.isRetainObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
-        if (Property.isCopyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
-        if (Property.isNonAtomicObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
-        if (PropertyAttributes)
+        if (unsigned PropertyAttributes = Property->getAttributes())
           addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
                   PropertyAttributes);
 
@@ -1104,28 +1077,27 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
           Entry = createDIEEntry(ElemDie);
           insertDIEEntry(Element, Entry);
         }
-      } else
-        continue;
+      }
     }
 
-    if (CTy.isAppleBlockExtension())
+    if (CTy->isAppleBlockExtension())
       addFlag(Buffer, dwarf::DW_AT_APPLE_block);
 
     // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
     // inside C++ composite types to point to the base class with the vtable.
-    DICompositeType ContainingType(resolve(CTy.getContainingType()));
-    if (ContainingType)
+    if (DICompositeType ContainingType =
+            dyn_cast_or_null<MDCompositeType>(resolve(CTy->getVTableHolder())))
       addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
                   *getOrCreateTypeDIE(ContainingType));
 
-    if (CTy.isObjcClassComplete())
+    if (CTy->isObjcClassComplete())
       addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
 
     // Add template parameters to a class, structure or union types.
     // FIXME: The support isn't in the metadata for this yet.
     if (Tag == dwarf::DW_TAG_class_type ||
         Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-      addTemplateParams(Buffer, CTy.getTemplateParams());
+      addTemplateParams(Buffer, CTy->getTemplateParams());
 
     break;
   }
@@ -1144,20 +1116,20 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // TODO: Do we care about size for enum forward declarations?
     if (Size)
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
-    else if (!CTy.isForwardDecl())
+    else if (!CTy->isForwardDecl())
       // Add zero size if it is not a forward declaration.
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
 
     // If we're a forward decl, say so.
-    if (CTy.isForwardDecl())
+    if (CTy->isForwardDecl())
       addFlag(Buffer, dwarf::DW_AT_declaration);
 
     // Add source line info if available.
-    if (!CTy.isForwardDecl())
+    if (!CTy->isForwardDecl())
       addSourceLine(Buffer, CTy);
 
     // No harm in adding the runtime language to the declaration.
-    unsigned RLang = CTy.getRunTimeLang();
+    unsigned RLang = CTy->getRuntimeLang();
     if (RLang)
       addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
               RLang);
@@ -1171,10 +1143,10 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
   DIE &ParamDIE =
       createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
   // Add the type if it exists, it could be void and therefore no type.
-  if (TP.getType())
-    addType(ParamDIE, resolve(TP.getType()));
-  if (!TP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
+  if (TP->getType())
+    addType(ParamDIE, resolve(TP->getType()));
+  if (!TP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
 }
 
 /// constructTemplateValueParameterDIE - Construct new DIE for the given
@@ -1182,17 +1154,17 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
 void
 DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
                                               DITemplateValueParameter VP) {
-  DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
+  DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);
 
   // Add the type if there is one, template template and template parameter
   // packs will not have a type.
-  if (VP.getTag() == dwarf::DW_TAG_template_value_parameter)
-    addType(ParamDIE, resolve(VP.getType()));
-  if (!VP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, VP.getName());
-  if (Metadata *Val = VP.getValue()) {
+  if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
+    addType(ParamDIE, resolve(VP->getType()));
+  if (!VP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
+  if (Metadata *Val = VP->getValue()) {
     if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
-      addConstantValue(ParamDIE, CI, resolve(VP.getType()));
+      addConstantValue(ParamDIE, CI, resolve(VP->getType()));
     else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
       // For declaration non-type template parameters (such as global values and
       // functions)
@@ -1202,14 +1174,12 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
       // parameter, rather than a pointer to it.
       addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
       addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
       assert(isa<MDString>(Val));
       addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
                 cast<MDString>(Val)->getString());
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
-      assert(isa<MDNode>(Val));
-      DIArray A(cast<MDNode>(Val));
-      addTemplateParams(ParamDIE, A);
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+      addTemplateParams(ParamDIE, cast<MDTuple>(Val));
     }
   }
 }
@@ -1218,19 +1188,19 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
 DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(NS.getContext());
+  DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());
 
   if (DIE *NDie = getDIE(NS))
     return NDie;
   DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
 
-  StringRef Name = NS.getName();
+  StringRef Name = NS->getName();
   if (!Name.empty())
-    addString(NDie, dwarf::DW_AT_name, NS.getName());
+    addString(NDie, dwarf::DW_AT_name, NS->getName());
   else
     Name = "(anonymous namespace)";
   DD->addAccelNamespace(Name, NDie);
-  addGlobalName(Name, NDie, NS.getContext());
+  addGlobalName(Name, NDie, NS->getScope());
   addSourceLine(NDie, NS);
   return &NDie;
 }
@@ -1241,12 +1211,12 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
   // such construction creates the DIE (as is the case for member function
   // declarations).
   DIE *ContextDIE =
-      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext()));
+      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
 
   if (DIE *SPDie = getDIE(SP))
     return SPDie;
 
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     if (!Minimal) {
       // Add subprogram definitions to the CU die directly.
       ContextDIE = &getUnitDie();
@@ -1260,7 +1230,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
 
   // Stop here and fill this in later, depending on whether or not this
   // subprogram turns out to have inlined instances or not.
-  if (SP.isDefinition())
+  if (SP->isDefinition())
     return &SPDie;
 
   applySubprogramAttributes(SP, SPDie);
@@ -1271,19 +1241,19 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
                                                     DIE &SPDie) {
   DIE *DeclDie = nullptr;
   StringRef DeclLinkageName;
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     DeclDie = getDIE(SPDecl);
     assert(DeclDie && "This DIE should've already been constructed when the "
                       "definition DIE was created in "
                       "getOrCreateSubprogramDIE");
-    DeclLinkageName = SPDecl.getLinkageName();
+    DeclLinkageName = SPDecl->getLinkageName();
   }
 
   // Add function template parameters.
-  addTemplateParams(SPDie, SP.getTemplateParams());
+  addTemplateParams(SPDie, SP->getTemplateParams());
 
   // Add the linkage name if we have one and it isn't in the Decl.
-  StringRef LinkageName = SP.getLinkageName();
+  StringRef LinkageName = SP->getLinkageName();
   assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
           LinkageName == DeclLinkageName) &&
          "decl has a linkage name and it is different");
@@ -1306,8 +1276,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
       return;
 
   // Constructors and operators for anonymous aggregates do not have names.
-  if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, SP.getName());
+  if (!SP->getName().empty())
+    addString(SPDie, dwarf::DW_AT_name, SP->getName());
 
   // Skip the rest of the attributes under -gmlt to save space.
   if (Minimal)
@@ -1318,33 +1288,34 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
   // Add the prototype if we have a prototype and we have a C like
   // language.
   uint16_t Language = getLanguage();
-  if (SP.isPrototyped() &&
+  if (SP->isPrototyped() &&
       (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
        Language == dwarf::DW_LANG_ObjC))
     addFlag(SPDie, dwarf::DW_AT_prototyped);
 
-  DISubroutineType SPTy = SP.getType();
-  assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type &&
+  DISubroutineType SPTy = SP->getType();
+  assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type &&
          "the type of a subprogram should be a subroutine");
 
-  DITypeArray Args = SPTy.getTypeArray();
+  auto Args = SPTy->getTypeArray();
   // Add a return type. If this is a type like a C/C++ void type we don't add a
   // return type.
-  if (resolve(Args.getElement(0)))
-    addType(SPDie, DIType(resolve(Args.getElement(0))));
+  if (Args.size())
+    if (auto Ty = resolve(Args[0]))
+      addType(SPDie, Ty);
 
-  unsigned VK = SP.getVirtuality();
+  unsigned VK = SP->getVirtuality();
   if (VK) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
     DIELoc *Block = getDIELoc();
     addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
     ContainingTypeMap.insert(
-        std::make_pair(&SPDie, resolve(SP.getContainingType())));
+        std::make_pair(&SPDie, resolve(SP->getContainingType())));
   }
 
-  if (!SP.isDefinition()) {
+  if (!SP->isDefinition()) {
     addFlag(SPDie, dwarf::DW_AT_declaration);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
@@ -1352,35 +1323,35 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
     constructSubprogramArguments(SPDie, Args);
   }
 
-  if (SP.isArtificial())
+  if (SP->isArtificial())
     addFlag(SPDie, dwarf::DW_AT_artificial);
 
-  if (!SP.isLocalToUnit())
+  if (!SP->isLocalToUnit())
     addFlag(SPDie, dwarf::DW_AT_external);
 
-  if (SP.isOptimized())
+  if (SP->isOptimized())
     addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
 
   if (unsigned isa = Asm->getISAEncoding())
     addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
 
-  if (SP.isLValueReference())
+  if (SP->isLValueReference())
     addFlag(SPDie, dwarf::DW_AT_reference);
 
-  if (SP.isRValueReference())
+  if (SP->isRValueReference())
     addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
 
-  if (SP.isProtected())
+  if (SP->isProtected())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (SP.isPrivate())
+  else if (SP->isPrivate())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (SP.isPublic())
+  else if (SP->isPublic())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (SP.isExplicit())
+  if (SP->isExplicit())
     addFlag(SPDie, dwarf::DW_AT_explicit);
 }
 
@@ -1393,9 +1364,9 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
   // C/C++. The Count value is the number of elements.  Values are 64 bit. If
   // Count == -1 then the array is unbounded and we do not emit
   // DW_AT_lower_bound and DW_AT_count attributes.
-  int64_t LowerBound = SR.getLo();
+  int64_t LowerBound = SR->getLowerBound();
   int64_t DefaultLowerBound = getDefaultLowerBound();
-  int64_t Count = SR.getCount();
+  int64_t Count = SR->getCount();
 
   if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
     addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
@@ -1420,11 +1391,11 @@ DIE *DwarfUnit::getIndexTyDie() {
 
 /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
 void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  if (CTy.isVector())
+  if (CTy->isVector())
     addFlag(Buffer, dwarf::DW_AT_GNU_vector);
 
   // Emit the element type.
-  addType(Buffer, resolve(CTy.getTypeDerivedFrom()));
+  addType(Buffer, resolve(CTy->getBaseType()));
 
   // Get an anonymous type for index type.
   // FIXME: This type should be passed down from the front end
@@ -1432,31 +1403,32 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
   DIE *IdxTy = getIndexTyDie();
 
   // Add subranges to array type.
-  DIArray Elements = CTy.getElements();
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Elements.getElement(i);
-    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  DIArray Elements = CTy->getElements();
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    // FIXME: Should this really be such a loose cast?
+    if (auto *Element = dyn_cast_or_null<DebugNode>(Elements[i]))
+      if (Element->getTag() == dwarf::DW_TAG_subrange_type)
+        constructSubrangeDIE(Buffer, cast<MDSubrange>(Element), IdxTy);
   }
 }
 
 /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
 void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  DIArray Elements = CTy.getElements();
+  DIArray Elements = CTy->getElements();
 
   // Add enumerators to enumeration type.
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIEnumerator Enum(Elements.getElement(i));
-    if (Enum.isEnumerator()) {
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    auto *Enum = dyn_cast_or_null<MDEnumerator>(Elements[i]);
+    if (Enum) {
       DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
-      StringRef Name = Enum.getName();
+      StringRef Name = Enum->getName();
       addString(Enumerator, dwarf::DW_AT_name, Name);
-      int64_t Value = Enum.getEnumValue();
+      int64_t Value = Enum->getValue();
       addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
               Value);
     }
   }
-  DIType DTy = resolve(CTy.getTypeDerivedFrom());
+  DIType DTy = resolve(CTy->getBaseType());
   if (DTy) {
     addType(Buffer, DTy);
     addFlag(Buffer, dwarf::DW_AT_enum_class);
@@ -1481,17 +1453,20 @@ void DwarfUnit::constructContainingTypeDIEs() {
 }
 
 /// constructMemberDIE - Construct member DIE from DIDerivedType.
-void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
-  DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer);
-  StringRef Name = DT.getName();
+void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT_) {
+  // Downcast to MDDerivedType.
+  const MDDerivedType *DT = cast<MDDerivedType>(DT_);
+
+  DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
+  StringRef Name = DT->getName();
   if (!Name.empty())
     addString(MemberDie, dwarf::DW_AT_name, Name);
 
-  addType(MemberDie, resolve(DT.getTypeDerivedFrom()));
+  addType(MemberDie, resolve(DT->getBaseType()));
 
   addSourceLine(MemberDie, DT);
 
-  if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) {
+  if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {
 
     // For C++, virtual base classes are not at fixed offset. Use following
     // expression to extract appropriate offset from vtable.
@@ -1501,14 +1476,14 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
 
     addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
   } else {
-    uint64_t Size = DT.getSizeInBits();
+    uint64_t Size = DT->getSizeInBits();
     uint64_t FieldSize = getBaseTypeSize(DD, DT);
     uint64_t OffsetInBytes;
 
@@ -1517,8 +1492,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
       addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
 
-      uint64_t Offset = DT.getOffsetInBits();
-      uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+      uint64_t Offset = DT->getOffsetInBits();
+      uint64_t AlignMask = ~(DT->getAlignInBits() - 1);
       uint64_t HiMark = (Offset + FieldSize) & AlignMask;
       uint64_t FieldOffset = (HiMark - FieldSize);
       Offset -= FieldOffset;
@@ -1533,7 +1508,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       OffsetInBytes = FieldOffset >> 3;
     } else
       // This is not a bitfield.
-      OffsetInBytes = DT.getOffsetInBits() >> 3;
+      OffsetInBytes = DT->getOffsetInBits() >> 3;
 
     if (DD->getDwarfVersion() <= 2) {
       DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc();
@@ -1545,49 +1520,50 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
               OffsetInBytes);
   }
 
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
-  if (DT.isVirtual())
+  if (DT->isVirtual())
     addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
             dwarf::DW_VIRTUALITY_virtual);
 
   // Objective-C properties.
-  if (MDNode *PNode = DT.getObjCProperty())
+  if (MDNode *PNode = DT->getObjCProperty())
     if (DIEEntry *PropertyDie = getDIEEntry(PNode))
       MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
                          PropertyDie);
 
-  if (DT.isArtificial())
+  if (DT->isArtificial())
     addFlag(MemberDie, dwarf::DW_AT_artificial);
 }
 
 /// getOrCreateStaticMemberDIE - Create new DIE for C++ static member.
-DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
-  if (!DT.Verify())
+DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT_) {
+  const MDDerivedType *DT = cast_or_null<MDDerivedType>(DT_);
+  if (!DT)
     return nullptr;
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext()));
+  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
   assert(dwarf::isType(ContextDIE->getTag()) &&
          "Static member should belong to a type.");
 
   if (DIE *StaticMemberDIE = getDIE(DT))
     return StaticMemberDIE;
 
-  DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT);
+  DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
 
-  DIType Ty = resolve(DT.getTypeDerivedFrom());
+  DIType Ty = resolve(DT->getBaseType());
 
-  addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+  addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
   addType(StaticMemberDIE, Ty);
   addSourceLine(StaticMemberDIE, DT);
   addFlag(StaticMemberDIE, dwarf::DW_AT_external);
@@ -1595,19 +1571,19 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
 
   // FIXME: We could omit private if the parent is a class_type, and
   // public if the parent is something else.
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
     addConstantValue(StaticMemberDIE, CI, Ty);
-  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
     addConstantFPValue(StaticMemberDIE, CFP);
 
   return &StaticMemberDIE;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 81c5821..b354255 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -141,7 +141,7 @@ public:
   // Accessors.
   AsmPrinter* getAsmPrinter() const { return Asm; }
   unsigned getUniqueID() const { return UniqueID; }
-  uint16_t getLanguage() const { return CUNode.getLanguage(); }
+  uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
   DICompileUnit getCUNode() const { return CUNode; }
   DIE &getUnitDie() { return UnitDie; }
 
@@ -342,7 +342,7 @@ protected:
 
   /// resolve - Look in the DwarfDebug map for the MDNode that
   /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const {
     return DD->resolve(Ref);
   }
 
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 14df4c9..6f64d8f 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -188,20 +188,12 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
   return MarkedNoUnwind;
 }
 
-/// Compute the call-site table.  The entry for an invoke has a try-range
-/// containing the call, a non-zero landing pad, and an appropriate action.  The
-/// entry for an ordinary call has a try-range containing the call and zero for
-/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
-/// must not be contained in the try-range of any entry - they form gaps in the
-/// table.  Entries must be ordered by try-range address.
-void EHStreamer::
-computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
-                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
-                     const SmallVectorImpl<unsigned> &FirstActions) {
+void EHStreamer::computePadMap(
+    const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+    RangeMapType &PadMap) {
   // Invokes and nounwind calls have entries in PadMap (due to being bracketed
   // by try-range labels when lowered).  Ordinary calls do not, so appropriate
   // try-ranges for them need be deduced so we can put them in the LSDA.
-  RangeMapType PadMap;
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
@@ -211,6 +203,20 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       PadMap[BeginLabel] = P;
     }
   }
+}
+
+/// Compute the call-site table.  The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action.  The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table.  Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
 
   // The end label of the previous invoke or nounwind try-range.
   MCSymbol *LastLabel = nullptr;
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 94d0585..65973fa 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -80,13 +80,15 @@ protected:
   /// `false' otherwise.
   bool callToNoUnwindFunction(const MachineInstr *MI);
 
+  void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     RangeMapType &PadMap);
+
   /// Compute the call-site table.  The entry for an invoke has a try-range
   /// containing the call, a non-zero landing pad and an appropriate action.
   /// The entry for an ordinary call has a try-range containing the call and
   /// zero for the landing pad and the action.  Calls marked 'nounwind' have
   /// no entry and must not be contained in the try-range of any entry - they
   /// form gaps in the table.  Entries must be ordered by try-range address.
-
   void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
                             const SmallVectorImpl<const LandingPadInfo *> &LPs,
                             const SmallVectorImpl<unsigned> &FirstActions);
@@ -123,7 +125,7 @@ protected:
 
 public:
   EHStreamer(AsmPrinter *A);
-  virtual ~EHStreamer();
+  ~EHStreamer() override;
 
   // Unused.
   void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 7d76ead..f89d364 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
@@ -67,6 +68,27 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
+
+  // If this was an outlined handler, we need to define the label corresponding
+  // to the offset of the parent frame relative to the stack pointer after the
+  // prologue.
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  if (F != ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+    auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) {
+      MCSymbol *HandlerTypeParentFrameOffset =
+          Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+              GlobalValue::getRealLinkageName(F->getName()));
+
+      // Emit a symbol assignment.
+      Asm->OutStreamer.EmitAssignment(
+          HandlerTypeParentFrameOffset,
+          MCConstantExpr::Create(I->second, Asm->OutContext));
+    }
+  }
+
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
@@ -82,12 +104,17 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void Win64Exception::endFunction(const MachineFunction *) {
+void Win64Exception::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  EHPersonality Per = MMI->getPersonalityType();
+
+  // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
+  // Windows EH schemes, the landing pad is not actually reachable. It only
+  // exists so that we can emit the right table data.
+  if (!isMSVCEHPersonality(Per))
+    MMI->TidyLandingPads();
 
   if (shouldEmitPersonality) {
     Asm->OutStreamer.PushSection();
@@ -97,9 +124,10 @@ void Win64Exception::endFunction(const MachineFunction *) {
 
     // Emit the tables appropriate to the personality function in use. If we
     // don't recognize the personality, assume it uses an Itanium-style LSDA.
-    EHPersonality Per = MMI->getPersonalityType();
     if (Per == EHPersonality::MSVC_Win64SEH)
       emitCSpecificHandlerTable();
+    else if (Per == EHPersonality::MSVC_CXX)
+      emitCXXFrameHandler3Table(MF);
     else
       emitExceptionTable();
 
@@ -108,11 +136,19 @@ void Win64Exception::endFunction(const MachineFunction *) {
   Asm->OutStreamer.EmitWinCFIEndProc();
 }
 
-const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+  if (!Value)
+    return MCConstantExpr::Create(0, Asm->OutContext);
   return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
                                  Asm->OutContext);
 }
 
+const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) {
+  if (!GV)
+    return MCConstantExpr::Create(0, Asm->OutContext);
+  return createImageRel32(Asm->getSymbol(GV));
+}
+
 /// Emit the language-specific data that __C_specific_handler expects.  This
 /// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
 /// up after faults with __try, __except, and __finally.  The typeinfo values
@@ -237,3 +273,231 @@ void Win64Exception::emitCSpecificHandlerTable() {
     }
   }
 }
+
+void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  auto &OS = Asm->OutStreamer;
+  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+
+  StringRef ParentLinkageName =
+      GlobalValue::getRealLinkageName(ParentF->getName());
+
+  MCSymbol *FuncInfoXData =
+      Asm->OutContext.GetOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName));
+  OS.EmitValue(createImageRel32(FuncInfoXData), 4);
+
+  // The Itanium LSDA table sorts similar landing pads together to simplify the
+  // actions table, but we don't need that.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  LandingPads.reserve(PadInfos.size());
+  for (const auto &LP : PadInfos)
+    LandingPads.push_back(&LP);
+
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
+
+  // The end label of the previous invoke or nounwind try-range.
+  MCSymbol *LastLabel = Asm->getFunctionBegin();
+
+  // Whether there is a potentially throwing instruction (currently this means
+  // an ordinary call) between the end of the previous try-range and now.
+  bool SawPotentiallyThrowing = false;
+
+  int LastEHState = -2;
+
+  // The parent function and the catch handlers contribute to the 'ip2state'
+  // table.
+  for (const auto &MBB : *MF) {
+    for (const auto &MI : MBB) {
+      if (!MI.isEHLabel()) {
+        if (MI.isCall())
+          SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
+        continue;
+      }
+
+      // End of the previous try-range?
+      MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
+      if (BeginLabel == LastLabel)
+        SawPotentiallyThrowing = false;
+
+      // Beginning of a new try-range?
+      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+      if (L == PadMap.end())
+        // Nope, it was just some random label.
+        continue;
+
+      const PadRange &P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+             "Inconsistent landing pad map!");
+
+      if (SawPotentiallyThrowing) {
+        FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+        SawPotentiallyThrowing = false;
+        LastEHState = -1;
+      }
+
+      if (LandingPad->WinEHState != LastEHState)
+        FuncInfo.IPToStateList.push_back(
+            std::make_pair(BeginLabel, LandingPad->WinEHState));
+      LastEHState = LandingPad->WinEHState;
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+    }
+  }
+
+  // Defer emission until we've visited the parent function and all the catch
+  // handlers.  Cleanups don't contribute to the ip2state table yet, so don't
+  // count them.
+  if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
+    return;
+  ++FuncInfo.NumIPToStateFuncsVisited;
+  if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
+    return;
+
+  MCSymbol *UnwindMapXData = nullptr;
+  MCSymbol *TryBlockMapXData = nullptr;
+  MCSymbol *IPToStateXData = nullptr;
+  if (!FuncInfo.UnwindMap.empty())
+    UnwindMapXData = Asm->OutContext.GetOrCreateSymbol(
+        Twine("$stateUnwindMap$", ParentLinkageName));
+  if (!FuncInfo.TryBlockMap.empty())
+    TryBlockMapXData = Asm->OutContext.GetOrCreateSymbol(
+        Twine("$tryMap$", ParentLinkageName));
+  if (!FuncInfo.IPToStateList.empty())
+    IPToStateXData = Asm->OutContext.GetOrCreateSymbol(
+        Twine("$ip2state$", ParentLinkageName));
+
+  // FuncInfo {
+  //   uint32_t           MagicNumber
+  //   int32_t            MaxState;
+  //   UnwindMapEntry    *UnwindMap;
+  //   uint32_t           NumTryBlocks;
+  //   TryBlockMapEntry  *TryBlockMap;
+  //   uint32_t           IPMapEntries;
+  //   IPToStateMapEntry *IPToStateMap;
+  //   uint32_t           UnwindHelp; // (x64/ARM only)
+  //   ESTypeList        *ESTypeList;
+  //   int32_t            EHFlags;
+  // }
+  // EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
+  // EHFlags & 2 -> ???
+  // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+  OS.EmitLabel(FuncInfoXData);
+  OS.EmitIntValue(0x19930522, 4);                      // MagicNumber
+  OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4);       // MaxState
+  OS.EmitValue(createImageRel32(UnwindMapXData), 4);   // UnwindMap
+  OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);     // NumTryBlocks
+  OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap
+  OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4);   // IPMapEntries
+  OS.EmitValue(createImageRel32(IPToStateXData), 4);   // IPToStateMap
+  OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4);  // UnwindHelp
+  OS.EmitIntValue(0, 4);                               // ESTypeList
+  OS.EmitIntValue(1, 4);                               // EHFlags
+
+  // UnwindMapEntry {
+  //   int32_t ToState;
+  //   void  (*Action)();
+  // };
+  if (UnwindMapXData) {
+    OS.EmitLabel(UnwindMapXData);
+    for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
+      OS.EmitIntValue(UME.ToState, 4);                // ToState
+      OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action
+    }
+  }
+
+  // TryBlockMap {
+  //   int32_t      TryLow;
+  //   int32_t      TryHigh;
+  //   int32_t      CatchHigh;
+  //   int32_t      NumCatches;
+  //   HandlerType *HandlerArray;
+  // };
+  if (TryBlockMapXData) {
+    OS.EmitLabel(TryBlockMapXData);
+    SmallVector<MCSymbol *, 1> HandlerMaps;
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = nullptr;
+
+      if (!TBME.HandlerArray.empty())
+        HandlerMapXData =
+            Asm->OutContext.GetOrCreateSymbol(Twine("$handlerMap$")
+                                                  .concat(Twine(I))
+                                                  .concat("$")
+                                                  .concat(ParentLinkageName));
+
+      HandlerMaps.push_back(HandlerMapXData);
+
+      int CatchHigh = -1;
+      for (WinEHHandlerType &HT : TBME.HandlerArray)
+        CatchHigh =
+            std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]);
+
+      assert(TBME.TryLow <= TBME.TryHigh);
+      assert(CatchHigh > TBME.TryHigh);
+      OS.EmitIntValue(TBME.TryLow, 4);                    // TryLow
+      OS.EmitIntValue(TBME.TryHigh, 4);                   // TryHigh
+      OS.EmitIntValue(CatchHigh, 4);                      // CatchHigh
+      OS.EmitIntValue(TBME.HandlerArray.size(), 4);       // NumCatches
+      OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray
+    }
+
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = HandlerMaps[I];
+      if (!HandlerMapXData)
+        continue;
+      // HandlerType {
+      //   int32_t         Adjectives;
+      //   TypeDescriptor *Type;
+      //   int32_t         CatchObjOffset;
+      //   void          (*Handler)();
+      //   int32_t         ParentFrameOffset; // x64 only
+      // };
+      OS.EmitLabel(HandlerMapXData);
+      for (const WinEHHandlerType &HT : TBME.HandlerArray) {
+        MCSymbol *ParentFrameOffset =
+            Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+                GlobalValue::getRealLinkageName(HT.Handler->getName()));
+        const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create(
+            ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+
+        // Get the frame escape label with the offset of the catch object. If
+        // the index is -1, then there is no catch object, and we should emit an
+        // offset of zero, indicating that no copy will occur.
+        const MCExpr *FrameAllocOffsetRef = nullptr;
+        if (HT.CatchObjRecoverIdx >= 0) {
+          MCSymbol *FrameAllocOffset =
+              Asm->OutContext.getOrCreateFrameAllocSymbol(
+                  GlobalValue::getRealLinkageName(ParentF->getName()),
+                  HT.CatchObjRecoverIdx);
+          FrameAllocOffsetRef = MCSymbolRefExpr::Create(
+              FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+        } else {
+          FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext);
+        }
+
+        OS.EmitIntValue(HT.Adjectives, 4);                    // Adjectives
+        OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type
+        OS.EmitValue(FrameAllocOffsetRef, 4);                 // CatchObjOffset
+        OS.EmitValue(createImageRel32(HT.Handler), 4);        // Handler
+        OS.EmitValue(ParentFrameOffsetRef, 4);                // ParentFrameOffset
+      }
+    }
+  }
+
+  // IPToStateMapEntry {
+  //   void   *IP;
+  //   int32_t State;
+  // };
+  if (IPToStateXData) {
+    OS.EmitLabel(IPToStateXData);
+    for (auto &IPStatePair : FuncInfo.IPToStateList) {
+      OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP
+      OS.EmitIntValue(IPStatePair.second, 4);               // State
+    }
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h
index b2d5d1b..5f4237f 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.h
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -17,7 +17,9 @@
 #include "EHStreamer.h"
 
 namespace llvm {
+class GlobalValue;
 class MachineFunction;
+class MCExpr;
 
 class Win64Exception : public EHStreamer {
   /// Per-function flag to indicate if personality info should be emitted.
@@ -31,14 +33,17 @@ class Win64Exception : public EHStreamer {
 
   void emitCSpecificHandlerTable();
 
-  const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value);
+  void emitCXXFrameHandler3Table(const MachineFunction *MF);
+
+  const MCExpr *createImageRel32(const MCSymbol *Value);
+  const MCExpr *createImageRel32(const GlobalValue *GV);
 
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
   Win64Exception(AsmPrinter *A);
-  virtual ~Win64Exception();
+  ~Win64Exception() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index d2b4eec..276e7df 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -20,14 +20,13 @@ namespace llvm {
 
 StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   assert(S);
-  DIDescriptor D(S);
-  assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() ||
-          D.isLexicalBlockFile() || D.isLexicalBlock()) &&
+  assert((isa<MDCompileUnit>(S) || isa<MDFile>(S) || isa<MDSubprogram>(S) ||
+          isa<MDLexicalBlockBase>(S)) &&
          "Unexpected scope info");
 
-  DIScope Scope(S);
-  StringRef Dir = Scope.getDirectory(),
-            Filename = Scope.getFilename();
+  auto *Scope = cast<MDScope>(S);
+  StringRef Dir = Scope->getDirectory(),
+            Filename = Scope->getFilename();
   char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
   if (Result)
     return Result;
@@ -40,7 +39,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   if (Filename.find(':') == 1)
     Filepath = Filename;
   else
-    Filepath = (Dir + Twine("\\") + Filename).str();
+    Filepath = (Dir + "\\" + Filename).str();
 
   // Canonicalize the path.  We have to do it textually because we may no longer
   // have access the file in the filesystem.
@@ -81,7 +80,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
 
 void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
                                                 const MachineFunction *MF) {
-  const MDNode *Scope = DL.getScope(MF->getFunction()->getContext());
+  const MDNode *Scope = DL.getScope();
   if (!Scope)
     return;
   StringRef Filename = getFullFilepath(Scope);
@@ -193,7 +192,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   StringRef GVName = GV->getName();
   StringRef FuncName;
   if (DISubprogram SP = getDISubprogram(GV))
-    FuncName = SP.getDisplayName();
+    FuncName = SP->getDisplayName();
 
   // FIXME Clang currently sets DisplayName to "bar" for a C++
   // "namespace_foo::bar" function, see PR21528.  Luckily, dbghelp.dll is trying
@@ -330,7 +329,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
   DebugLoc PrologEndLoc;
   bool EmptyPrologue = true;
   for (const auto &MBB : *MF) {
-    if (!PrologEndLoc.isUnknown())
+    if (PrologEndLoc)
       break;
     for (const auto &MI : MBB) {
       if (MI.isDebugValue())
@@ -339,8 +338,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
       // First known non-DBG_VALUE and non-frame setup location marks
       // the beginning of the function body.
       // FIXME: do we need the first subcondition?
-      if (!MI.getFlag(MachineInstr::FrameSetup) &&
-          (!MI.getDebugLoc().isUnknown())) {
+      if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
         PrologEndLoc = MI.getDebugLoc();
         break;
       }
@@ -348,9 +346,8 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
     }
   }
   // Record beginning of function if we have a non-empty prologue.
-  if (!PrologEndLoc.isUnknown() && !EmptyPrologue) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
+  if (PrologEndLoc && !EmptyPrologue) {
+    DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
     maybeRecordLocation(FnStartDL, MF);
   }
 }
@@ -377,7 +374,7 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
     return;
   DebugLoc DL = MI->getDebugLoc();
-  if (DL == PrevInstLoc || DL.isUnknown())
+  if (DL == PrevInstLoc || !DL)
     return;
   maybeRecordLocation(DL, Asm->MF);
 }
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 8492eac..c66d141 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -114,7 +114,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler {
 public:
   WinCodeViewLineTables(AsmPrinter *Asm);
 
-  ~WinCodeViewLineTables() {
+  ~WinCodeViewLineTables() override {
     for (DirAndFilenameToFilepathMapTy::iterator
              I = DirAndFilenameToFilepathMap.begin(),
              E = DirAndFilenameToFilepathMap.end();
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 6c9d048..35376e1 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -693,11 +693,11 @@ static bool SinkCast(CastInst *CI) {
       InsertedCast =
         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
                          InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cast with a use of the new cast.
     TheUse = InsertedCast;
+    MadeChange = true;
     ++NumCastUses;
   }
 
@@ -747,13 +747,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   return SinkCast(CI);
 }
 
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
 /// the number of virtual registers that must be created and coalesced.  This is
 /// a clear win except on targets with multiple condition code registers
 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
   BasicBlock *DefBB = CI->getParent();
 
   /// InsertedCmp - Only insert a cmp in each block once.
@@ -787,21 +834,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
         CmpInst::Create(CI->getOpcode(),
                         CI->getPredicate(),  CI->getOperand(0),
                         CI->getOperand(1), "", InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cmp with a use of the new cmp.
     TheUse = InsertedCmp;
+    MadeChange = true;
     ++NumCmpUses;
   }
 
   // If we removed all uses, nuke the cmp.
-  if (CI->use_empty())
+  if (CI->use_empty()) {
     CI->eraseFromParent();
+    MadeChange = true;
+  }
 
   return MadeChange;
 }
 
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
 /// isExtractBitsCandidateUse - Check if the candidates could
 /// be combined with shift instruction, which includes:
 /// 1. Truncate instruction
@@ -1081,8 +1140,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
     //
     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
     Builder.SetInsertPoint(InsertPt);
-    
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     LoadInst* Load = Builder.CreateLoad(Gep, false);
     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
 
@@ -1176,7 +1236,8 @@ static void ScalarizeMaskedStore(CallInst *CI) {
     Builder.SetInsertPoint(InsertPt);
     
     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     Builder.CreateStore(OneElt, Gep);
 
     // Create "else" block, fill it in the next iteration
@@ -1227,13 +1288,25 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
                      cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
       uint64_t Offset2 = Offset.getLimitedValue();
+      if ((Offset2 & (PrefAlign-1)) != 0)
+        continue;
       AllocaInst *AI;
-      if ((Offset2 & (PrefAlign-1)) == 0 &&
-          (AI = dyn_cast<AllocaInst>(Val)) &&
+      if ((AI = dyn_cast<AllocaInst>(Val)) &&
           AI->getAlignment() < PrefAlign &&
           TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
         AI->setAlignment(PrefAlign);
-      // TODO: Also align GlobalVariables
+      // Global variables can only be aligned if they are defined in this
+      // object (i.e. they are uniquely initialized in this object), and
+      // over-aligning global variables that have an explicit section is
+      // forbidden.
+      GlobalVariable *GV;
+      if ((GV = dyn_cast<GlobalVariable>(Val)) &&
+          GV->hasUniqueInitializer() &&
+          !GV->hasSection() &&
+          GV->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(
+            GV->getType()->getElementType()) >= MinSize + Offset2)
+        GV->setAlignment(PrefAlign);
     }
     // If this is a memcpy (or similar) then we may be able to improve the
     // alignment
@@ -1841,7 +1914,7 @@ class TypePromotionTransaction {
       Inst->removeFromParent();
     }
 
-    ~InstructionRemover() { delete Replacer; }
+    ~InstructionRemover() override { delete Replacer; }
 
     /// \brief Really remove the instruction.
     void commit() override { delete Inst; }
@@ -3233,7 +3306,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       return false;
     } else {
       Type *I8PtrTy =
-        Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+          Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+      Type *I8Ty = Builder.getInt8Ty();
 
       // Start with the base register. Do this first so that subsequent address
       // matching finds it last, which will prevent it from trying to match it
@@ -3285,7 +3359,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
           // SDAG consecutive load/store merging.
           if (ResultPtr->getType() != I8PtrTy)
             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-          ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+          ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
         }
 
         ResultIndex = V;
@@ -3296,7 +3370,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       } else {
         if (ResultPtr->getType() != I8PtrTy)
           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-        SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+        SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
       }
 
       if (SunkAddr->getType() != Addr->getType())
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index ceef74d..af011a0 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -69,7 +69,7 @@ class TargetRegisterInfo;
 
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
-    ~CriticalAntiDepBreaker();
+    ~CriticalAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 8f74271..6cde4c2 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -797,9 +797,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   // if-conversion in a single pass. The tryConvertIf() function may erase
   // blocks, but only blocks dominated by the head block. This makes it safe to
   // update the dominator tree while the post-order iterator is still active.
-  for (po_iterator<MachineDominatorTree*>
-       I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
-    if (tryConvertIf(I->getBlock()))
+  for (auto DomNode : post_order(DomTree))
+    if (tryConvertIf(DomNode->getBlock()))
       Changed = true;
 
   return Changed;
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index a2c5fce..16cd9e8 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -99,10 +99,6 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-  case GC::Loop:
-    return "loop";
-  case GC::Return:
-    return "return";
   case GC::PreCall:
     return "pre-call";
   case GC::PostCall:
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index 9d38e4c..ac35165 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -332,19 +332,22 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
-  if (!FI->getStrategy().needsSafePoints())
-    return false;
-
   MMI = &getAnalysis<MachineModuleInfo>();
   TII = MF.getSubtarget().getInstrInfo();
 
-  // Find the size of the stack frame.
-  FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+  // Find the size of the stack frame.  There may be no correct static frame
+  // size, we use UINT64_MAX to represent this.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+    RegInfo->needsStackRealignment(MF);
+  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
 
   // Find all safe points.
-  FindSafePoints(MF);
+  if (FI->getStrategy().needsSafePoints())
+    FindSafePoints(MF);
 
-  // Find the stack offsets for all roots.
+  // Find the concrete stack offsets for all roots (stack slots)
   FindStackOffsets(MF);
 
   return false;
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 4188e5d..153ba1a 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -73,9 +73,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "global-merge"
 
+// FIXME: This is only useful as a last-resort way to disable the pass.
 cl::opt<bool>
 EnableGlobalMerge("enable-global-merge", cl::Hidden,
-                  cl::desc("Enable global merge pass"),
+                  cl::desc("Enable the global merge pass"),
                   cl::init(true));
 
 static cl::opt<bool>
@@ -222,7 +223,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
         ConstantInt::get(Int32Ty, 0),
         ConstantInt::get(Int32Ty, k-i)
       };
-      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+      Constant *GEP =
+          ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
       Globals[k]->replaceAllUsesWith(GEP);
       Globals[k]->eraseFromParent();
 
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index f0d407f..c7e7e58 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -135,7 +135,7 @@ private:
   // Dead defs generated during spilling.
   SmallVector<MachineInstr*, 8> DeadDefs;
 
-  ~InlineSpiller() {}
+  ~InlineSpiller() override {}
 
 public:
   InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
@@ -1232,6 +1232,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
       DebugLoc DL = MI->getDebugLoc();
       DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
       MachineBasicBlock *MBB = MI->getParent();
+      assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
           .addFrameIndex(StackSlot)
           .addImm(Offset)
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 0fb0c46..61d68f6 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -140,12 +140,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
   return &MMI->getContext();
 }
 
-bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                            formatted_raw_ostream &Out,
-                                            CodeGenFileType FileType,
-                                            bool DisableVerify,
-                                            AnalysisID StartAfter,
-                                            AnalysisID StopAfter) {
+bool LLVMTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
   // Add common CodeGen passes.
   MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
                                                StartAfter, StopAfter);
@@ -175,7 +172,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   switch (FileType) {
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
-        MAI.getAssemblerDialect(), MAI, MII, MRI, STI);
+        Triple(getTargetTriple()), MAI.getAssemblerDialect(), MAI, MII, MRI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = nullptr;
@@ -184,8 +181,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
     MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                        TargetCPU);
+    auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
-        *Context, Out, Options.MCOptions.AsmVerbose,
+        *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
         Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
         Options.MCOptions.ShowMCInst);
     AsmStreamer.reset(S);
@@ -229,9 +227,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 /// code is not supported. It fills the MCContext Ctx pointer which can be
 /// used to build custom MCStreamer.
 ///
-bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
-                                          MCContext *&Ctx,
-                                          raw_ostream &Out,
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
+                                          raw_pwrite_stream &Out,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr);
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 9eaf7da..d6998d6 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -59,11 +59,11 @@ void LexicalScopes::extractLexicalScopes(
   for (const auto &MBB : *MF) {
     const MachineInstr *RangeBeginMI = nullptr;
     const MachineInstr *PrevMI = nullptr;
-    DebugLoc PrevDL;
+    const MDLocation *PrevDL = nullptr;
     for (const auto &MInsn : MBB) {
       // Check if instruction has valid location information.
-      const DebugLoc MIDL = MInsn.getDebugLoc();
-      if (MIDL.isUnknown()) {
+      const MDLocation *MIDL = MInsn.getDebugLoc();
+      if (!MIDL) {
         PrevMI = &MInsn;
         continue;
       }
@@ -96,7 +96,7 @@ void LexicalScopes::extractLexicalScopes(
     }
 
     // Create last instruction range.
-    if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+    if (RangeBeginMI && PrevMI && PrevDL) {
       InsnRange R(RangeBeginMI, PrevMI);
       MIRanges.push_back(R);
       MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
@@ -106,20 +106,17 @@ void LexicalScopes::extractLexicalScopes(
 
 /// findLexicalScope - Find lexical scope, either regular or inlined, for the
 /// given DebugLoc. Return NULL if not found.
-LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
-  MDNode *Scope = nullptr;
-  MDNode *IA = nullptr;
-  DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+LexicalScope *LexicalScopes::findLexicalScope(const MDLocation *DL) {
+  MDLocalScope *Scope = DL->getScope();
   if (!Scope)
     return nullptr;
 
   // The scope that we were created with could have an extra file - which
   // isn't what we care about in this case.
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
-  if (IA) {
+  if (auto *IA = DL->getInlinedAt()) {
     auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
     return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
   }
@@ -128,46 +125,39 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
 
 /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
 /// not available then create new lexical scope.
-LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
-  if (DL.isUnknown())
-    return nullptr;
-  MDNode *Scope = nullptr;
-  MDNode *InlinedAt = nullptr;
-  DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
-
-  if (InlinedAt) {
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(const MDLocalScope *Scope,
+                                                     const MDLocation *IA) {
+  if (IA) {
     // Create an abstract scope for inlined function.
     getOrCreateAbstractScope(Scope);
     // Create an inlined scope for inlined function.
-    return getOrCreateInlinedScope(Scope, InlinedAt);
+    return getOrCreateInlinedScope(Scope, IA);
   }
 
   return getOrCreateRegularScope(Scope);
 }
 
 /// getOrCreateRegularScope - Find or create a regular lexical scope.
-LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile()) {
-    Scope = DILexicalBlockFile(Scope).getScope();
-    D = DIDescriptor(Scope);
-  }
+LexicalScope *
+LexicalScopes::getOrCreateRegularScope(const MDLocalScope *Scope) {
+  if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
   auto I = LexicalScopeMap.find(Scope);
   if (I != LexicalScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following dyn_cast be MDLexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (D.isLexicalBlock())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope))
+    Parent = getOrCreateLexicalScope(Block->getScope());
   I = LexicalScopeMap.emplace(std::piecewise_construct,
                               std::forward_as_tuple(Scope),
-                              std::forward_as_tuple(Parent, DIDescriptor(Scope),
-                                                    nullptr, false)).first;
+                              std::forward_as_tuple(Parent, Scope, nullptr,
+                                                    false)).first;
 
   if (!Parent) {
-    assert(DIDescriptor(Scope).isSubprogram());
-    assert(DISubprogram(Scope).describes(MF->getFunction()));
+    assert(cast<MDSubprogram>(Scope)->describes(MF->getFunction()));
     assert(!CurrentFnLexicalScope);
     CurrentFnLexicalScope = &I->second;
   }
@@ -176,19 +166,19 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
 }
 
 /// getOrCreateInlinedScope - Find or create an inlined lexical scope.
-LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
-                                                     MDNode *InlinedAt) {
-  std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt);
+LexicalScope *
+LexicalScopes::getOrCreateInlinedScope(const MDLocalScope *Scope,
+                                       const MDLocation *InlinedAt) {
+  std::pair<const MDLocalScope *, const MDLocation *> P(Scope, InlinedAt);
   auto I = InlinedLexicalScopeMap.find(P);
   if (I != InlinedLexicalScopeMap.end())
     return &I->second;
 
   LexicalScope *Parent;
-  DILexicalBlock Scope(ScopeNode);
-  if (Scope.isSubprogram())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt));
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope))
+    Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt);
   else
-    Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt);
+    Parent = getOrCreateLexicalScope(InlinedAt);
 
   I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
                                      std::forward_as_tuple(P),
@@ -199,27 +189,26 @@ LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
 }
 
 /// getOrCreateAbstractScope - Find or create an abstract lexical scope.
-LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
-  assert(N && "Invalid Scope encoding!");
+LexicalScope *
+LexicalScopes::getOrCreateAbstractScope(const MDLocalScope *Scope) {
+  assert(Scope && "Invalid Scope encoding!");
 
-  DIDescriptor Scope(N);
-  if (Scope.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope))
+    Scope = File->getScope();
   auto I = AbstractScopeMap.find(Scope);
   if (I != AbstractScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following isa be MDLexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (Scope.isLexicalBlock()) {
-    DILexicalBlock DB(Scope);
-    DIDescriptor ParentDesc = DB.getContext();
-    Parent = getOrCreateAbstractScope(ParentDesc);
-  }
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope))
+    Parent = getOrCreateAbstractScope(Block->getScope());
+
   I = AbstractScopeMap.emplace(std::piecewise_construct,
                                std::forward_as_tuple(Scope),
                                std::forward_as_tuple(Parent, Scope,
                                                      nullptr, true)).first;
-  if (Scope.isSubprogram())
+  if (isa<MDSubprogram>(Scope))
     AbstractScopesList.push_back(&I->second);
   return &I->second;
 }
@@ -280,7 +269,7 @@ void LexicalScopes::assignInstructionRanges(
 /// have machine instructions that belong to lexical scope identified by
 /// DebugLoc.
 void LexicalScopes::getMachineBasicBlocks(
-    DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+    const MDLocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
   MBBs.clear();
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
@@ -303,7 +292,7 @@ void LexicalScopes::getMachineBasicBlocks(
 
 /// dominates - Return true if DebugLoc's lexical scope dominates at least one
 /// machine instruction's lexical scope in a given machine basic block.
-bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+bool LexicalScopes::dominates(const MDLocation *DL, MachineBasicBlock *MBB) {
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
     return false;
@@ -315,12 +304,10 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
   bool Result = false;
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
        ++I) {
-    DebugLoc IDL = I->getDebugLoc();
-    if (IDL.isUnknown())
-      continue;
-    if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
-      if (Scope->dominates(IScope))
-        return true;
+    if (const MDLocation *IDL = I->getDebugLoc())
+      if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+        if (Scope->dominates(IScope))
+          return true;
   }
   return Result;
 }
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index e3791be..c2993db 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -158,10 +158,10 @@ public:
   UserValue *getNext() const { return next; }
 
   /// match - Does this UserValue match the parameters?
-  bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset,
-             bool indirect) const {
-    return Var == Variable && Expr == Expression && Offset == offset &&
-           indirect == IsIndirect;
+  bool match(const MDNode *Var, const MDNode *Expr, const MDLocation *IA,
+             unsigned Offset, bool indirect) const {
+    return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA &&
+           Offset == offset && indirect == IsIndirect;
   }
 
   /// merge - Merge equivalence classes.
@@ -269,12 +269,6 @@ public:
   void emitDebugValues(VirtRegMap *VRM,
                        LiveIntervals &LIS, const TargetInstrInfo &TRI);
 
-  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
-  /// variable may have more than one corresponding DBG_VALUE instructions.
-  /// Only first one needs DebugLoc to identify variable's lexical scope
-  /// in source file.
-  DebugLoc findDebugLoc();
-
   /// getDebugLoc - Return DebugLoc of this UserValue.
   DebugLoc getDebugLoc() { return dl;}
   void print(raw_ostream &, const TargetRegisterInfo *);
@@ -363,10 +357,47 @@ public:
 };
 } // namespace
 
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+                          const LLVMContext &Ctx) {
+  if (!DL)
+    return;
+
+  auto *Scope = cast<MDScope>(DL.getScope());
+  // Omit the directory, because it's likely to be long and uninteresting.
+  CommentOS << Scope->getFilename();
+  CommentOS << ':' << DL.getLine();
+  if (DL.getCol() != 0)
+    CommentOS << ':' << DL.getCol();
+
+  DebugLoc InlinedAtDL = DL.getInlinedAt();
+  if (!InlinedAtDL)
+    return;
+
+  CommentOS << " @[ ";
+  printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+  CommentOS << " ]";
+}
+
+static void printExtendedName(raw_ostream &OS, const MDLocalVariable *V,
+                              const MDLocation *DL) {
+  const LLVMContext &Ctx = V->getContext();
+  StringRef Res = V->getName();
+  if (!Res.empty())
+    OS << Res << "," << V->getLine();
+  if (auto *InlinedAt = DL->getInlinedAt()) {
+    if (DebugLoc InlinedAtDL = InlinedAt) {
+      OS << " @[";
+      printDebugLoc(InlinedAtDL, OS, Ctx);
+      OS << "]";
+    }
+  }
+}
+
 void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
-  DIVariable DV(Variable);
+  DIVariable DV = cast<MDLocalVariable>(Variable);
   OS << "!\"";
-  DV.printExtendedName(OS);
+  printExtendedName(OS, DV, dl);
+
   OS << "\"\t";
   if (offset)
     OS << '+' << offset;
@@ -433,7 +464,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
     UserValue *UV = Leader->getLeader();
     Leader = UV;
     for (; UV; UV = UV->getNext())
-      if (UV->match(Var, Expr, Offset, IsIndirect))
+      if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect))
         return UV;
   }
 
@@ -942,11 +973,6 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
                               std::next(MachineBasicBlock::iterator(MI));
 }
 
-DebugLoc UserValue::findDebugLoc() {
-  DebugLoc D = dl;
-  dl = DebugLoc();
-  return D;
-}
 void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
                                  unsigned LocNo,
                                  LiveIntervals &LIS,
@@ -955,11 +981,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
   MachineOperand &Loc = locations[LocNo];
   ++NumInsertedDebugValues;
 
+  assert(cast<MDLocalVariable>(Variable)
+             ->isValidLocationForIntrinsic(getDebugLoc()) &&
+         "Expected inlined-at fields to agree");
   if (Loc.isReg())
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
             IsIndirect, Loc.getReg(), offset, Variable, Expression);
   else
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
         .addOperand(Loc)
         .addImm(offset)
         .addMetadata(Variable)
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 9748329..fe296bc 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -38,7 +38,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
 
   LiveDebugVariables();
-  ~LiveDebugVariables();
+  ~LiveDebugVariables() override;
 
   /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
   /// @param OldReg Old virtual register that is going away.
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 2afd7fa..d75e441 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -816,23 +816,45 @@ static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR,
 
 static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
   SmallPtrSet<const MachineBasicBlock*, 5> Visited;
-  for (LiveRange::Segment &S : LI.segments) {
-    if (S.valno != nullptr)
-      continue;
-    // This can only happen at the begin of a basic block.
-    assert(S.start.isBlock() && "valno should only be missing at block begin");
-
-    Visited.clear();
-    const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
-    for (const MachineBasicBlock *Pred : MBB->predecessors()) {
-      VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
-      if (VNI != nullptr) {
-        S.valno = VNI;
-        break;
+
+  LiveRange::iterator OutIt;
+  VNInfo *PrevValNo = nullptr;
+  for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &S = *I;
+    // Determine final VNI if necessary.
+    if (S.valno == nullptr) {
+      // This can only happen at the begin of a basic block.
+      assert(S.start.isBlock() && "valno should only be missing at block begin");
+
+      Visited.clear();
+      const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
+      for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+        VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
+        if (VNI != nullptr) {
+          S.valno = VNI;
+          break;
+        }
       }
+      assert(S.valno != nullptr && "could not determine valno");
+    }
+    // Merge with previous segment if it has the same VNI.
+    if (PrevValNo == S.valno && OutIt->end == S.start) {
+      OutIt->end = S.end;
+    } else {
+      // Didn't merge. Move OutIt to next segment.
+      if (PrevValNo == nullptr)
+        OutIt = LI.begin();
+      else
+        ++OutIt;
+
+      if (OutIt != I)
+        *OutIt = *I;
+      PrevValNo = S.valno;
     }
-    assert(S.valno != nullptr && "could not determine valno");
   }
+  // If we merged some segments chop off the end.
+  ++OutIt;
+  LI.segments.erase(OutIt, LI.end());
 }
 
 void LiveInterval::constructMainRangeFromSubranges(
@@ -955,6 +977,12 @@ void LiveInterval::constructMainRangeFromSubranges(
             NeedVNIFixup = true;
         }
 
+        // In rare cases we can produce adjacent segments with the same value
+        // number (if they come from different subranges, but happen to have
+        // the same defining instruction). VNIFixup will fix those cases.
+        if (!empty() && segments.back().end == Pos &&
+            segments.back().valno == VNI)
+          NeedVNIFixup = true;
         CurrentSegment.start = Pos;
         CurrentSegment.valno = VNI;
         ConstructingSegment = true;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 98359b1..34131bb 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -250,7 +250,7 @@ std::string MachineBasicBlock::getFullName() const {
   if (getBasicBlock())
     Name += getBasicBlock()->getName();
   else
-    Name += (Twine("BB") + Twine(getNumber())).str();
+    Name += ("BB" + Twine(getNumber())).str();
   return Name;
 }
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index ecc50c9..2969bad 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -661,7 +661,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
   for (MachineBasicBlock *MBB : L.getBlocks()) {
     BlockChain &Chain = *BlockToChain[MBB];
     // Ensure that this block is at the end of a chain; otherwise it could be
-    // mid-way through an inner loop or a successor of an analyzable branch.
+    // mid-way through an inner loop or a successor of an unanalyzable branch.
     if (MBB != *std::prev(Chain.end()))
       continue;
 
@@ -715,7 +715,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
       // a frequency higher than the current exit before we consider breaking
       // the layout.
       BranchProbability Bias(100 - ExitBlockBias, 100);
-      if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
+      if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
           ExitEdgeFreq > BestExitEdgeFreq ||
           (MBB->isLayoutSuccessor(Succ) &&
            !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
@@ -724,8 +724,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
       }
     }
 
-    // Restore the old exiting state, no viable looping successor was found.
     if (!HasLoopingSucc) {
+      // Restore the old exiting state, no viable looping successor was found.
       ExitingBB = OldExitingBB;
       BestExitEdgeFreq = OldBestExitEdgeFreq;
       continue;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 6ceace8..448531f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -380,7 +380,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getName().str() + "' function";
+      return ("CFG for '" + F->getName() + "' function").str();
     }
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -468,7 +468,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.GetOrCreateSymbol(Name);
 }
 
 /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 1240efb..d154110 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -881,8 +881,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
   }
   // If DebugLoc does not match then two dbg.values are not identical.
   if (isDebugValue())
-    if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
-        && getDebugLoc() != Other->getDebugLoc())
+    if (getDebugLoc() && Other->getDebugLoc() &&
+        getDebugLoc() != Other->getDebugLoc())
       return false;
   return true;
 }
@@ -1619,12 +1619,9 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
     }
     if (isDebugValue() && MO.isMetadata()) {
       // Pretty print DBG_VALUE instructions.
-      const MDNode *MD = MO.getMetadata();
-      DIDescriptor DI(MD);
-      DIVariable DIV(MD);
-
-      if (DI.isVariable() && !DIV.getName().empty())
-        OS << "!\"" << DIV.getName() << '\"';
+      DIVariable DIV = dyn_cast<MDLocalVariable>(MO.getMetadata());
+      if (DIV && !DIV->getName().empty())
+        OS << "!\"" << DIV->getName() << '\"';
       else
         MO.print(OS, TRI);
     } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
@@ -1711,13 +1708,13 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
   }
 
   // Print debug location information.
-  if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+  if (isDebugValue() && getOperand(e - 2).isMetadata()) {
     if (!HaveSemi) OS << ";";
-    DIVariable DV(getOperand(e - 1).getMetadata());
-    OS << " line no:" <<  DV.getLineNumber();
-    if (MDNode *InlinedAt = DV.getInlinedAt()) {
-      DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-      if (!InlinedAtDL.isUnknown() && MF) {
+    DIVariable DV = cast<MDLocalVariable>(getOperand(e - 2).getMetadata());
+    OS << " line no:" <<  DV->getLine();
+    if (auto *InlinedAt = debugLoc->getInlinedAt()) {
+      DebugLoc InlinedAtDL(InlinedAt);
+      if (InlinedAtDL && MF) {
         OS << " inlined @[ ";
 	InlinedAtDL.print(OS);
         OS << " ]";
@@ -1725,7 +1722,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
     }
     if (isIndirectDebugValue())
       OS << " indirect";
-  } else if (!debugLoc.isUnknown() && MF) {
+  } else if (debugLoc && MF) {
     if (!HaveSemi) OS << ";";
     OS << " dbg:";
     debugLoc.print(OS);
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 2f65a2e..9756f13 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -10,10 +10,6 @@
 // This pass performs loop invariant code motion on machine instructions. We
 // attempt to remove as much code from the body of a loop as possible.
 //
-// This pass does not attempt to throttle itself to limit register pressure.
-// The register allocation phases are expected to perform rematerialization
-// to recover when register pressure is high.
-//
 // This pass is not intended to be a replacement or a complete alternative
 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
 // constructs that are not exposed before lowering and instruction selection.
@@ -104,7 +100,7 @@ namespace {
     SmallSet<unsigned, 32> RegSeen;
     SmallVector<unsigned, 8> RegPressure;
 
-    // Register pressure "limit" per register class. If the pressure
+    // Register pressure "limit" per register pressure set. If the pressure
     // is higher than the limit, then it's considered high.
     SmallVector<unsigned, 8> RegLimit;
 
@@ -214,7 +210,8 @@ namespace {
     /// CanCauseHighRegPressure - Visit BBs from header to current BB,
     /// check if hoisting an instruction of the given cost matrix can cause high
     /// register pressure.
-    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
+    bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
+                                 bool Cheap);
 
     /// UpdateBackTraceRegPressure - Traverse the back trace from header to
     /// the current block and update their register pressures to reflect the
@@ -254,21 +251,25 @@ namespace {
     /// if there is little to no overhead moving instructions into loops.
     void SinkIntoLoop();
 
-    /// getRegisterClassIDAndCost - For a given MI, register, and the operand
-    /// index, return the ID and cost of its representative register class by
-    /// reference.
-    void getRegisterClassIDAndCost(const MachineInstr *MI,
-                                   unsigned Reg, unsigned OpIdx,
-                                   unsigned &RCId, unsigned &RCCost) const;
-
     /// InitRegPressure - Find all virtual register references that are liveout
     /// of the preheader to initialize the starting "register pressure". Note
     /// this does not count live through (livein but not used) registers.
     void InitRegPressure(MachineBasicBlock *BB);
 
+    /// calcRegisterCost - Calculate the additional register pressure that the
+    /// registers used in MI cause.
+    ///
+    /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+    /// figure out which usages are live-ins.
+    /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
+    DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
+                                             bool ConsiderSeen,
+                                             bool ConsiderUnseenAsDef);
+
     /// UpdateRegPressure - Update estimate of register pressure after the
     /// specified instruction.
-    void UpdateRegPressure(const MachineInstr *MI);
+    void UpdateRegPressure(const MachineInstr *MI,
+                           bool ConsiderUnseenAsDef = false);
 
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
@@ -354,13 +355,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
 
   if (PreRegAlloc) {
     // Estimate register pressure during pre-regalloc pass.
-    unsigned NumRC = TRI->getNumRegClasses();
-    RegPressure.resize(NumRC);
+    unsigned NumRPS = TRI->getNumRegPressureSets();
+    RegPressure.resize(NumRPS);
     std::fill(RegPressure.begin(), RegPressure.end(), 0);
-    RegLimit.resize(NumRC);
-    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-           E = TRI->regclass_end(); I != E; ++I)
-      RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+    RegLimit.resize(NumRPS);
+    for (unsigned i = 0, e = NumRPS; i != e; ++i)
+      RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
   }
 
   // Get our Loop information...
@@ -836,23 +836,6 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
   return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
 }
 
-/// getRegisterClassIDAndCost - For a given MI, register, and the operand
-/// index, return the ID and cost of its representative register class.
-void
-MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
-                                       unsigned Reg, unsigned OpIdx,
-                                       unsigned &RCId, unsigned &RCCost) const {
-  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
-  MVT VT = *RC->vt_begin();
-  if (VT == MVT::Untyped) {
-    RCId = RC->getID();
-    RCCost = 1;
-  } else {
-    RCId = TLI->getRepRegClassFor(VT)->getID();
-    RCCost = TLI->getRepRegClassCostFor(VT);
-  }
-}
-
 /// InitRegPressure - Find all virtual register references that are liveout of
 /// the preheader to initialize the starting "register pressure". Note this
 /// does not count live through (livein but not used) registers.
@@ -870,41 +853,30 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
       InitRegPressure(*BB->pred_begin());
   }
 
-  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
-       MII != E; ++MII) {
-    MachineInstr *MI = &*MII;
-    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || MO.isImplicit())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg))
-        continue;
-
-      bool isNew = RegSeen.insert(Reg).second;
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (MO.isDef())
-        RegPressure[RCId] += RCCost;
-      else {
-        bool isKill = isOperandKill(MO, MRI);
-        if (isNew && !isKill)
-          // Haven't seen this, it must be a livein.
-          RegPressure[RCId] += RCCost;
-        else if (!isNew && isKill)
-          RegPressure[RCId] -= RCCost;
-      }
-    }
-  }
+  for (const MachineInstr &MI : *BB)
+    UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
 }
 
 /// UpdateRegPressure - Update estimate of register pressure after the
 /// specified instruction.
-void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
+                                    bool ConsiderUnseenAsDef) {
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
+  for (const auto &RPIdAndCost : Cost) {
+    unsigned Class = RPIdAndCost.first;
+    if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+      RegPressure[Class] = 0;
+    else
+      RegPressure[Class] += RPIdAndCost.second;
+  }
+}
 
-  SmallVector<unsigned, 4> Defs;
+DenseMap<unsigned, int>
+MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
+                              bool ConsiderUnseenAsDef) {
+  DenseMap<unsigned, int> Cost;
+  if (MI->isImplicitDef())
+    return Cost;
   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -913,27 +885,33 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
 
-    bool isNew = RegSeen.insert(Reg).second;
+    // FIXME: It seems bad to use RegSeen only for some of these calculations.
+    bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+    RegClassWeight W = TRI->getRegClassWeight(RC);
+    int RCCost = 0;
     if (MO.isDef())
-      Defs.push_back(Reg);
-    else if (!isNew && isOperandKill(MO, MRI)) {
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (RCCost > RegPressure[RCId])
-        RegPressure[RCId] = 0;
+      RCCost = W.RegWeight;
+    else {
+      bool isKill = isOperandKill(MO, MRI);
+      if (isNew && !isKill && ConsiderUnseenAsDef)
+        // Haven't seen this, it must be a livein.
+        RCCost = W.RegWeight;
+      else if (!isNew && isKill)
+        RCCost = -W.RegWeight;
+    }
+    if (RCCost == 0)
+      continue;
+    const int *PS = TRI->getRegClassPressureSets(RC);
+    for (; *PS != -1; ++PS) {
+      if (Cost.find(*PS) == Cost.end())
+        Cost[*PS] = RCCost;
       else
-        RegPressure[RCId] -= RCCost;
+        Cost[*PS] += RCCost;
     }
   }
-
-  unsigned Idx = 0;
-  while (!Defs.empty()) {
-    unsigned Reg = Defs.pop_back_val();
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
-    RegPressure[RCId] += RCCost;
-    ++Idx;
-  }
+  return Cost;
 }
 
 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction
@@ -1125,27 +1103,23 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
 /// if hoisting an instruction of the given cost matrix can cause high
 /// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
                                           bool CheapInstr) {
-  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-       CI != CE; ++CI) {
-    if (CI->second <= 0)
+  for (const auto &RPIdAndCost : Cost) {
+    if (RPIdAndCost.second <= 0)
       continue;
 
-    unsigned RCId = CI->first;
-    unsigned Limit = RegLimit[RCId];
-    int Cost = CI->second;
+    unsigned Class = RPIdAndCost.first;
+    int Limit = RegLimit[Class];
 
     // Don't hoist cheap instructions if they would increase register pressure,
     // even if we're under the limit.
     if (CheapInstr && !HoistCheapInsts)
       return true;
 
-    for (unsigned i = BackTrace.size(); i != 0; --i) {
-      SmallVectorImpl<unsigned> &RP = BackTrace[i-1];
-      if (RP[RCId] + Cost >= Limit)
+    for (const auto &RP : BackTrace)
+      if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
         return true;
-    }
   }
 
   return false;
@@ -1155,46 +1129,15 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
 /// current block and update their register pressures to reflect the effect
 /// of hoisting MI from the current block to the preheader.
 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
-
   // First compute the 'cost' of the instruction, i.e. its contribution
   // to register pressure.
-  DenseMap<unsigned, int> Cost;
-  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isImplicit())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second += RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, RCCost));
-    } else if (isOperandKill(MO, MRI)) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second -= RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, -RCCost));
-    }
-  }
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
 
   // Update register pressure of blocks from loop header to current block.
-  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
-    SmallVectorImpl<unsigned> &RP = BackTrace[i];
-    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-         CI != CE; ++CI) {
-      unsigned RCId = CI->first;
-      RP[RCId] += CI->second;
-    }
-  }
+  for (auto &RP : BackTrace)
+    for (const auto &RPIdAndCost : Cost)
+      RP[RPIdAndCost.first] += RPIdAndCost.second;
 }
 
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
@@ -1229,15 +1172,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (TII->isTriviallyReMaterializable(&MI, AA))
     return true;
 
-  // Estimate register pressure to determine whether to LICM the instruction.
-  // In low register pressure situation, we can be more aggressive about
-  // hoisting. Also, favors hoisting long latency instructions even in
-  // moderately high pressure situation.
-  // Cheap instructions will only be hoisted if they don't increase register
-  // pressure at all.
   // FIXME: If there are long latency loop-invariant instructions inside the
   // loop at this point, why didn't the optimizer's LICM hoist them?
-  DenseMap<unsigned, int> Cost;
   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -1245,24 +1181,22 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      if (HasHighOperandLatency(MI, i, Reg)) {
-        DEBUG(dbgs() << "Hoist High Latency: " << MI);
-        ++NumHighLatency;
-        return true;
-      }
-      Cost[RCId] += RCCost;
-    } else if (isOperandKill(MO, MRI)) {
-      // Is a virtual register use is a kill, hoisting it out of the loop
-      // may actually reduce register pressure or be register pressure
-      // neutral.
-      Cost[RCId] -= RCCost;
+    if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+      DEBUG(dbgs() << "Hoist High Latency: " << MI);
+      ++NumHighLatency;
+      return true;
     }
   }
 
+  // Estimate register pressure to determine whether to LICM the instruction.
+  // In low register pressure situation, we can be more aggressive about
+  // hoisting. Also, favors hoisting long latency instructions even in
+  // moderately high pressure situation.
+  // Cheap instructions will only be hoisted if they don't increase register
+  // pressure at all.
+  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
+
   // Visit BBs from header to current BB, if hoisting this doesn't cause
   // high register pressure, then it's safe to proceed.
   if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index fca7df0..e8bd1f8 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -425,6 +426,12 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
     Personalities.push_back(Personality);
 }
 
+void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad,
+                                      int State) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.WinEHState = State;
+}
+
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
 ///
 void MachineModuleInfo::
@@ -563,10 +570,13 @@ const Function *MachineModuleInfo::getPersonality() const {
 }
 
 EHPersonality MachineModuleInfo::getPersonalityType() {
-  if (PersonalityTypeCache == EHPersonality::Unknown)
-    PersonalityTypeCache = classifyEHPersonality(getPersonality());
+  if (PersonalityTypeCache == EHPersonality::Unknown) {
+    if (const Function *F = getPersonality())
+      PersonalityTypeCache = classifyEHPersonality(F);
+  }
   return PersonalityTypeCache;
 }
+
 /// getPersonalityIndex - Return unique index for current personality
 /// function. NULL/first personality function should always get zero index.
 unsigned MachineModuleInfo::getPersonalityIndex() const {
@@ -588,3 +598,18 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   // in the zero index.
   return 0;
 }
+
+const Function *MachineModuleInfo::getWinEHParent(const Function *F) const {
+  StringRef WinEHParentName =
+      F->getFnAttribute("wineh-parent").getValueAsString();
+  if (WinEHParentName.empty() || WinEHParentName == F->getName())
+    return F;
+  return F->getParent()->getFunction(WinEHParentName);
+}
+
+WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) {
+  auto &Ptr = FuncInfoMap[getWinEHParent(F)];
+  if (!Ptr)
+    Ptr.reset(new WinEHFuncInfo);
+  return *Ptr;
+}
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index a1c7e9f..22d519e 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -31,15 +31,14 @@ static int SortSymbolPair(const void *LHS, const void *RHS) {
   return LHSS->getName().compare(RHSS->getName());
 }
 
-/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
-/// sorted orer.
-MachineModuleInfoImpl::SymbolListTy
-MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
-                                      MachineModuleInfoImpl::StubValueTy>&Map) {
+MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
+    DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
   MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
 
   if (!List.empty())
     qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+
+  Map.clear();
   return List;
 }
 
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 7a3c80b..a52d05f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1036,8 +1036,6 @@ void ScheduleDAGMILive::schedule() {
 
     scheduleMI(SU, IsTopNode);
 
-    updateQueues(SU, IsTopNode);
-
     if (DFSResult) {
       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
       if (!ScheduledTrees.test(SubtreeID)) {
@@ -1049,6 +1047,8 @@ void ScheduleDAGMILive::schedule() {
 
     // Notify the scheduling strategy after updating the DAG.
     SchedImpl->schedNode(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 8aacd1f..5dc7c21 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -463,13 +463,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
   // Run an upwards post-order search for the trace start.
   Bounds.Downward = false;
   Bounds.Visited.clear();
-  typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
-  for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  pred for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the predecessors have been visited, pick the preferred one.
-    TBI.Pred = pickTracePred(*I);
+    TBI.Pred = pickTracePred(I);
     DEBUG({
       if (TBI.Pred)
         dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
@@ -477,19 +475,17 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leading to I is now known, compute the depth resources.
-    computeDepthResources(*I);
+    computeDepthResources(I);
   }
 
   // Run a downwards post-order search for the trace end.
   Bounds.Downward = true;
   Bounds.Visited.clear();
-  typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
-  for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  succ for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the successors have been visited, pick the preferred one.
-    TBI.Succ = pickTraceSucc(*I);
+    TBI.Succ = pickTraceSucc(I);
     DEBUG({
       if (TBI.Succ)
         dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
@@ -497,7 +493,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leaving I is now known, compute the height resources.
-    computeHeightResources(*I);
+    computeHeightResources(I);
   }
 }
 
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index ad59fc9..55f08e4 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -141,7 +141,7 @@ namespace {
         TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
         SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
 
-    ~SchedulePostRATDList();
+    ~SchedulePostRATDList() override;
 
     /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e073e6a..5334a63 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
@@ -752,6 +753,25 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   if (!TFI.needsFrameIndexResolution(Fn)) return;
 
+  MachineModuleInfo &MMI = Fn.getMMI();
+  const Function *F = Fn.getFunction();
+  const Function *ParentF = MMI.getWinEHParent(F);
+  unsigned FrameReg;
+  if (F == ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    // FIXME: This should be unconditional but we have bugs in the preparation
+    // pass.
+    if (FuncInfo.UnwindHelpFrameIdx != INT_MAX)
+      FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP(
+          Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg);
+  } else if (MMI.hasWinEHFuncInfo(F)) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end())
+      FuncInfo.CatchHandlerParentFrameObjOffset[F] =
+          TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg);
+  }
+
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
   SPState.resize(Fn.getNumBlockIDs());
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index c621414..c311c7b 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -301,13 +301,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
       const MDNode *Expr = DBG->getDebugExpression();
       bool IsIndirect = DBG->isIndirectDebugValue();
       uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
-      DebugLoc DL;
-      if (MI == MBB->end()) {
-        // If MI is at basic block end then use last instruction's location.
-        MachineBasicBlock::iterator EI = MI;
-        DL = (--EI)->getDebugLoc();
-      } else
-        DL = MI->getDebugLoc();
+      DebugLoc DL = DBG->getDebugLoc();
+      assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       MachineInstr *NewDV =
           BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
               .addFrameIndex(FI)
@@ -877,6 +873,9 @@ void RAFast::AllocateBasicBlock() {
               const MDNode *Expr = MI->getDebugExpression();
               DebugLoc DL = MI->getDebugLoc();
               MachineBasicBlock *MBB = MI->getParent();
+              assert(
+                  cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+                  "Expected inlined-at fields to agree");
               MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
                                             TII->get(TargetOpcode::DBG_VALUE))
                                         .addFrameIndex(SS)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index e94f1bb..26f42c9 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -538,8 +538,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
     // Giant live ranges fall back to the global assignment heuristic, which
     // prevents excessive spilling in pathological cases.
     bool ReverseLocal = TRI->reverseLocalAssignment();
+    const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
     bool ForceGlobal = !ReverseLocal &&
-      (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
+      (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
 
     if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
         LIS->intervalIsInOneMBB(*LI)) {
@@ -552,10 +553,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
         // Allocating bottom up may allow many short LRGs to be assigned first
         // to one of the cheap registers. This could be much faster for very
         // large blocks on targets with many physical registers.
-        Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex());
+        Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
       }
-    }
-    else {
+      Prio |= RC.AllocationPriority << 24;
+    } else {
       // Allocate global and split ranges in long->short order. Long ranges that
       // don't fit should be spilled (or split) ASAP so they don't create
       // interference.  Mark a bit to prioritize global above local ranges.
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 9e3cf41..5d958a6 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -2731,15 +2731,19 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
   assert(Copy.isCopyLike());
   if (!UseTerminalRule)
     return false;
+  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
+  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
   // Check if the destination of this copy has any other affinity.
-  unsigned DstReg = Copy.getOperand(0).getReg();
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      // If SrcReg is a physical register, the copy won't be coalesced.
+      // Ignoring it may have other side effect (like missing
+      // rematerialization). So keep it.
+      TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
       !isTerminalReg(DstReg, Copy, MRI))
     return false;
 
   // DstReg is a terminal node. Check if it inteferes with any other
   // copy involving SrcReg.
-  unsigned SrcReg = Copy.getOperand(1).getReg();
   const MachineBasicBlock *OrigBB = Copy.getParent();
   const LiveInterval &DstLI = LIS->getInterval(DstReg);
   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
@@ -2751,9 +2755,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
     // For now, just consider the copies that are in the same block.
     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
       continue;
-    unsigned OtherReg = MI.getOperand(0).getReg();
+    unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
+    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+                OtherSubReg);
     if (OtherReg == SrcReg)
-      OtherReg = MI.getOperand(1).getReg();
+      OtherReg = OtherSrcReg;
     // Check if OtherReg is a non-terminal.
     if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
         isTerminalReg(OtherReg, MI, MRI))
@@ -2775,25 +2781,45 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   // yet, it might invalidate the iterator.
   const unsigned PrevSize = WorkList.size();
   if (JoinGlobalCopies) {
+    SmallVector<MachineInstr*, 2> LocalTerminals;
+    SmallVector<MachineInstr*, 2> GlobalTerminals;
     // Coalesce copies bottom-up to coalesce local defs before local uses. They
     // are not inherently easier to resolve, but slightly preferable until we
     // have local live range splitting. In particular this is required by
     // cmp+jmp macro fusion.
     for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
          MII != E; ++MII) {
-      if (!MII->isCopyLike() || applyTerminalRule(*MII))
+      if (!MII->isCopyLike())
         continue;
-      if (isLocalCopy(&(*MII), LIS))
-        LocalWorkList.push_back(&(*MII));
-      else
-        WorkList.push_back(&(*MII));
+      bool ApplyTerminalRule = applyTerminalRule(*MII);
+      if (isLocalCopy(&(*MII), LIS)) {
+        if (ApplyTerminalRule)
+          LocalTerminals.push_back(&(*MII));
+        else
+          LocalWorkList.push_back(&(*MII));
+      } else {
+        if (ApplyTerminalRule)
+          GlobalTerminals.push_back(&(*MII));
+        else
+          WorkList.push_back(&(*MII));
+      }
     }
+    // Append the copies evicted by the terminal rule at the end of the list.
+    LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
+    WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
   }
   else {
+    SmallVector<MachineInstr*, 2> Terminals;
      for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
           MII != E; ++MII)
-       if (MII->isCopyLike() && !applyTerminalRule(*MII))
-         WorkList.push_back(MII);
+       if (MII->isCopyLike()) {
+        if (applyTerminalRule(*MII))
+          Terminals.push_back(&(*MII));
+        else
+          WorkList.push_back(MII);
+       }
+     // Append the copies evicted by the terminal rule at the end of the list.
+     WorkList.append(Terminals.begin(), Terminals.end());
   }
   // Try coalescing the collected copies immediately, and remove the nulls.
   // This prevents the WorkList from getting too large since most copies are
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a1c84c5..22fd6d6 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -705,16 +705,8 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
           EltVT.getSizeInBits() >= SplatBitSize);
 }
 
-// \brief Returns the SDNode if it is a constant BuildVector or constant.
-static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
-  if (isa<ConstantSDNode>(N))
-    return N.getNode();
-  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
-  if (BV && BV->isConstant())
-    return BV;
-  return nullptr;
-}
-
+// \brief Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
   if (isa<ConstantSDNode>(N))
     return N.getNode();
@@ -723,6 +715,8 @@ static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
   return nullptr;
 }
 
+// \brief Returns the SDNode if it is a constant float BuildVector
+// or constant float.
 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
   if (isa<ConstantFPSDNode>(N))
     return N.getNode();
@@ -773,8 +767,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
                                     SDValue N0, SDValue N1) {
   EVT VT = N0.getValueType();
   if (N0.getOpcode() == Opc) {
-    if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
-      if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
+    if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+      if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
@@ -793,8 +787,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
   }
 
   if (N1.getOpcode() == Opc) {
-    if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
-      if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
+    if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
+      if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
@@ -1583,8 +1577,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (add x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1604,7 +1598,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   // fold (add x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -1624,8 +1619,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                          N0C->getAPIntValue(), VT),
                          N0.getOperand(1));
   // reassociate add
-  SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
-  if (RADD.getNode())
+  if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
     return RADD;
   // fold ((0-A) + B) -> B-A
   if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
@@ -1828,8 +1822,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (sub x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1984,26 +1978,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   APInt ConstValue0, ConstValue1;
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
   } else {
-    N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
-    ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
-                            : APInt();
-    N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
-    ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
-                            : APInt();
+    N0IsConst = isa<ConstantSDNode>(N0);
+    if (N0IsConst)
+      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+    N1IsConst = isa<ConstantSDNode>(N1);
+    if (N1IsConst)
+      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
   }
 
   // fold (mul c1, c2) -> c1*c2
   if (N0IsConst && N1IsConst)
     return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
 
-  // canonicalize constant to RHS
-  if (N0IsConst && !N1IsConst)
+  // canonicalize constant to RHS (vector doesn't have to splat)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   // fold (mul x, 0) -> 0
   if (N1IsConst && ConstValue1 == 0)
@@ -2083,8 +2078,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                    N0.getOperand(1), N1));
 
   // reassociate mul
-  SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
-  if (RMUL.getNode())
+  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
     return RMUL;
 
   return SDValue();
@@ -2096,10 +2090,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (sdiv c1, c2) -> c1/c2
   ConstantSDNode *N0C = isConstOrConstSplat(N0);
@@ -2163,7 +2156,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
   }
 
-  // if integer divide is expensive and we satisfy the requirements, emit an
+  // If integer divide is expensive and we satisfy the requirements, emit an
   // alternate sequence.
   if (N1C && !TLI.isIntDivCheap()) {
     SDValue Op = BuildSDIV(N);
@@ -2186,10 +2179,9 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (udiv c1, c2) -> c1/c2
   ConstantSDNode *N0C = isConstOrConstSplat(N0);
@@ -2459,8 +2451,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2489,8 +2481,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2809,8 +2801,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (and x, 0) -> 0, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -2839,7 +2831,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   // fold (and x, -1) -> x
   if (N1C && N1C->isAllOnesValue())
@@ -2850,8 +2843,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                                    APInt::getAllOnesValue(BitWidth)))
     return DAG.getConstant(0, VT);
   // reassociate and
-  SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
-  if (RAND.getNode())
+  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
     return RAND;
   // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
@@ -3470,8 +3462,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (or x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3556,7 +3548,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   // fold (or x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -3580,8 +3573,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     return BSwap;
 
   // reassociate or
-  SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
-  if (ROR.getNode())
+  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
     return ROR;
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   // iff (c1 & c2) == 0.
@@ -3875,8 +3867,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (xor x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3899,14 +3891,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   // fold (xor x, 0) -> x
   if (N1C && N1C->isNullValue())
     return N0;
   // reassociate xor
-  SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
-  if (RXOR.getNode())
+  if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
     return RXOR;
 
   // fold !(x cc y) -> (x !cc y)
@@ -4152,8 +4144,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold vector ops
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
     // If setcc produces all-one true value then:
@@ -4332,8 +4324,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // fold vector ops
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
@@ -4478,8 +4470,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold vector ops
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
@@ -4889,7 +4881,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       SDValue N1_0 = N1->getOperand(0);
       SDValue N1_1 = N1->getOperand(1);
       SDValue N1_2 = N1->getOperand(2);
-      if (N1_2 == N2) {
+      if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
         // Create the actual and node if we can generate good code for it.
         if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
@@ -4908,7 +4900,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       SDValue N2_0 = N2->getOperand(0);
       SDValue N2_1 = N2->getOperand(1);
       SDValue N2_2 = N2->getOperand(2);
-      if (N2_1 == N1) {
+      if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
         // Create the actual or node if we can generate good code for it.
         if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
@@ -7037,7 +7029,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 
   // Finally, this must be the case where we are shrinking elements: each input
   // turns into multiple outputs.
-  bool isS2V = ISD::isScalarToVector(BV);
   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
                             NumOutputsPerInput*BV->getNumOperands());
@@ -7055,10 +7046,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
       APInt ThisVal = OpVal.trunc(DstBitSize);
       Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
-      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
-        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
-        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
-                           Ops[0]);
       OpVal = OpVal.lshr(DstBitSize);
     }
 
@@ -7206,10 +7193,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fadd c1, c2) -> c1 + c2
   if (N0CFP && N1CFP)
@@ -7400,10 +7386,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fsub c1, c2) -> c1-c2
   if (N0CFP && N1CFP)
@@ -7552,15 +7537,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   // fold vector ops
   if (VT.isVector()) {
     // This just handles C1 * C2 for vectors. Other vector folds are below.
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
-    // Canonicalize vector constant to RHS.
-    if (N0.getOpcode() == ISD::BUILD_VECTOR &&
-        N1.getOpcode() != ISD::BUILD_VECTOR)
-      if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
-        if (BV0->isConstant())
-          return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   }
 
   // fold (fmul c1, c2) -> c1*c2
@@ -7568,7 +7546,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
     return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
 
   // canonicalize constant to RHS
-  if (N0CFP && !N1CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0) &&
+     !isConstantFPBuildVectorOrConstantFP(N1))
     return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
 
   // fold (fmul A, 1.0) -> A
@@ -7734,10 +7713,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fdiv c1, c2) -> c1/c2
   if (N0CFP && N1CFP)
@@ -8216,11 +8194,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fceil c1) -> fceil(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -8228,11 +8205,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
 
 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -8240,11 +8216,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
 
 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -10080,19 +10055,19 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
 
   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
-  unsigned EarliestNodeUsed = 0;
+  unsigned LatestNodeUsed = 0;
 
   for (unsigned i=0; i < NumElem; ++i) {
     // Find a chain for the new wide-store operand. Notice that some
     // of the store nodes that we found may not be selected for inclusion
     // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
   }
 
-  // The earliest Node in the DAG.
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
   SDLoc DL(StoreNodes[0].MemNode);
 
   SDValue StoredVal;
@@ -10151,17 +10126,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
     StoredVal = DAG.getConstant(StoreInt, StoreTy);
   }
 
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+  SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
                                   FirstInChain->getBasePtr(),
                                   FirstInChain->getPointerInfo(),
                                   false, false,
                                   FirstInChain->getAlignment());
 
-  // Replace the first store with the new store
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store
+  CombineTo(LatestOp, NewStore);
   // Erase all other stores.
   for (unsigned i = 0; i < NumElem ; ++i) {
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
       continue;
     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
     // ReplaceAllUsesWith will replace all uses that existed when it was
@@ -10538,18 +10513,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (NumElem < 2)
     return false;
 
-  // The earliest Node in the DAG.
-  unsigned EarliestNodeUsed = 0;
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  unsigned LatestNodeUsed = 0;
   for (unsigned i=1; i<NumElem; ++i) {
     // Find a chain for the new wide-store operand. Notice that some
     // of the store nodes that we found may not be selected for inclusion
     // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
   }
 
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
   // Find if it is better to use vectors or integers to load and store
   // to memory.
   EVT JointMemOpVT;
@@ -10571,7 +10547,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
                                 false, false, false,
                                 FirstLoad->getAlignment());
 
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+  SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad,
                                   FirstInChain->getBasePtr(),
                                   FirstInChain->getPointerInfo(), false, false,
                                   FirstInChain->getAlignment());
@@ -10589,12 +10565,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
   }
 
-  // Replace the first store with the new store.
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store.
+  CombineTo(LatestOp, NewStore);
   // Erase all other stores.
   for (unsigned i = 0; i < NumElem ; ++i) {
     // Remove all Store nodes.
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
       continue;
     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
     DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
@@ -11523,6 +11499,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   return SDValue();
 }
 
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT OpVT = N->getOperand(0).getValueType();
+
+  // If the operands are legal vectors, leave them alone.
+  if (TLI.isTypeLegal(OpVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SmallVector<SDValue, 8> Ops;
+
+  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+  // Keep track of what we encounter.
+  bool AnyInteger = false;
+  bool AnyFP = false;
+  for (const SDValue &Op : N->ops()) {
+    if (ISD::BITCAST == Op.getOpcode() &&
+        !Op.getOperand(0).getValueType().isVector())
+      Ops.push_back(Op.getOperand(0));
+    else if (ISD::UNDEF == Op.getOpcode())
+      Ops.push_back(ScalarUndef);
+    else
+      return SDValue();
+
+    // Note whether we encounter an integer or floating point scalar.
+    // If it's neither, bail out, it could be something weird like x86mmx.
+    EVT LastOpVT = Ops.back().getValueType();
+    if (LastOpVT.isFloatingPoint())
+      AnyFP = true;
+    else if (LastOpVT.isInteger())
+      AnyInteger = true;
+    else
+      return SDValue();
+  }
+
+  // If any of the operands is a floating point scalar bitcast to a vector,
+  // use floating point types throughout, and bitcast everything.  
+  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+  if (AnyFP) {
+    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+    if (AnyInteger) {
+      for (SDValue &Op : Ops) {
+        if (Op.getValueType() == SVT)
+          continue;
+        if (Op.getOpcode() == ISD::UNDEF)
+          Op = ScalarUndef;
+        else
+          Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+      }
+    }
+  }
+
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+                               VT.getSizeInBits() / SVT.getSizeInBits());
+  return DAG.getNode(ISD::BITCAST, DL, VT,
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+}
+
 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
   // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
@@ -11538,9 +11576,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(VT);
 
-  // Optimize concat_vectors where one of the vectors is undef.
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+  // Optimize concat_vectors where all but the first of the vectors are undef.
+  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+        return Op.getOpcode() == ISD::UNDEF;
+      })) {
     SDValue In = N->getOperand(0);
     assert(In.getValueType().isVector() && "Must concat vectors");
 
@@ -11548,6 +11587,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     if (In->getOpcode() == ISD::BITCAST &&
         !In->getOperand(0)->getValueType(0).isVector()) {
       SDValue Scalar = In->getOperand(0);
+
+      // If the bitcast type isn't legal, it might be a trunc of a legal type;
+      // look through the trunc so we can still do the transform:
+      //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+      if (Scalar->getOpcode() == ISD::TRUNCATE &&
+          !TLI.isTypeLegal(Scalar.getValueType()) &&
+          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+        Scalar = Scalar->getOperand(0);
+
       EVT SclTy = Scalar->getValueType(0);
 
       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
@@ -11615,6 +11663,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   }
 
+  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+    return V;
+
   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   // nodes often generate nop CONCAT_VECTOR nodes.
   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -11676,7 +11728,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     // type.
     if (V->getOperand(0).getValueType() != NVT)
       return SDValue();
-    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned Idx = N->getConstantOperandVal(1);
     unsigned NumElems = NVT.getVectorNumElements();
     assert((Idx % NumElems) == 0 &&
            "IDX in concat is not a multiple of the result vector length.");
@@ -12001,6 +12053,43 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       return V;
   }
 
+  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+    SmallVector<SDValue, 8> Ops;
+    for (int M : SVN->getMask()) {
+      SDValue Op = DAG.getUNDEF(VT.getScalarType());
+      if (M >= 0) {
+        int Idx = M % NumElts;
+        SDValue &S = (M < (int)NumElts ? N0 : N1);
+        if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+          Op = S.getOperand(Idx);
+        } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+          if (Idx == 0)
+            Op = S.getOperand(0);
+        } else {
+          // Operand can't be combined - bail out.
+          break;
+        }
+      }
+      Ops.push_back(Op);
+    }
+    if (Ops.size() == VT.getVectorNumElements()) {
+      // BUILD_VECTOR requires all inputs to be of the same type, find the
+      // maximum type and extend them all.
+      EVT SVT = VT.getScalarType();
+      if (SVT.isInteger())
+        for (SDValue &Op : Ops)
+          SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+      if (SVT != VT.getScalarType())
+        for (SDValue &Op : Ops)
+          Op = TLI.isZExtFree(Op.getValueType(), SVT)
+                   ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+                   : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+    }
+  }
+
   // If this shuffle only has a single input that is a bitcasted shuffle,
   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
   // back to their original types.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 223a149..5ffb826 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -425,7 +425,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
 
   // Check if the second operand is a constant and handle it appropriately.
   if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-    uint64_t Imm = CI->getZExtValue();
+    uint64_t Imm = CI->getSExtValue();
 
     // Transform "sdiv exact X, 8" -> "sra X, 3".
     if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
@@ -1079,11 +1079,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
     return true;
+  case Intrinsic::eh_actions: {
+    unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
+    if (!ResultReg)
+      return false;
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
-    DIVariable DIVar(DI->getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-           "Variable in DbgDeclareInst should be either null or a DIVariable.");
+    DIVariable DIVar = DI->getVariable();
     if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) {
       DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
@@ -1124,6 +1129,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
                                      false);
 
     if (Op) {
+      assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+             "Expected inlined-at fields to agree");
       if (Op->isReg()) {
         Op->setIsDebug(true);
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1148,6 +1155,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     const DbgValueInst *DI = cast<DbgValueInst>(II);
     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
+    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+           "Expected inlined-at fields to agree");
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 291b583..4b8ae32 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -79,12 +80,35 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) {
   return ExtendKind;
 }
 
+namespace {
+struct WinEHNumbering {
+  WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), NextState(0) {}
+
+  WinEHFuncInfo &FuncInfo;
+  int NextState;
+
+  SmallVector<ActionHandler *, 4> HandlerStack;
+  SmallPtrSet<const Function *, 4> VisitedHandlers;
+
+  int currentEHNumber() const {
+    return HandlerStack.empty() ? -1 : HandlerStack.back()->getEHState();
+  }
+
+  void createUnwindMapEntry(int ToState, ActionHandler *AH);
+  void createTryBlockMapEntry(int TryLow, int TryHigh,
+                              ArrayRef<CatchHandler *> Handlers);
+  void processCallSite(ArrayRef<ActionHandler *> Actions, ImmutableCallSite CS);
+  void calculateStateNumbers(const Function &F);
+};
+}
+
 void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                SelectionDAG *DAG) {
   Fn = &fn;
   MF = &mf;
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
+  MachineModuleInfo &MMI = MF->getMMI();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -178,13 +202,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       // during the initial isel pass through the IR so that it is done
       // in a predictable order.
       if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
-        MachineModuleInfo &MMI = MF->getMMI();
-        DIVariable DIVar(DI->getVariable());
-        assert((!DIVar || DIVar.isVariable()) &&
-          "Variable in DbgDeclareInst should be either null or a DIVariable.");
-        if (MMI.hasDebugInfo() &&
-            DIVar &&
-            !DI->getDebugLoc().isUnknown()) {
+        DIVariable DIVar = DI->getVariable();
+        if (MMI.hasDebugInfo() && DIVar && DI->getDebugLoc()) {
           // Don't handle byval struct arguments or VLAs, for example.
           // Non-byval arguments are handled here (they refer to the stack
           // temporary alloca at this point).
@@ -252,8 +271,179 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
   // Mark landing pad blocks.
   for (BB = Fn->begin(); BB != EB; ++BB)
-    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+    if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
       MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+  // Calculate EH numbers for WinEH.
+  if (fn.hasFnAttribute("wineh-parent")) {
+    const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
+    WinEHFuncInfo &FI = MMI.getWinEHFuncInfo(WinEHParentFn);
+    if (FI.LandingPadStateMap.empty()) {
+      WinEHNumbering Num(FI);
+      Num.calculateStateNumbers(*WinEHParentFn);
+      // Pop everything on the handler stack.
+      Num.processCallSite(None, ImmutableCallSite());
+    }
+  }
+}
+
+void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
+  WinEHUnwindMapEntry UME;
+  UME.ToState = ToState;
+  if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
+    UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
+  else
+    UME.Cleanup = nullptr;
+  FuncInfo.UnwindMap.push_back(UME);
+}
+
+void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
+                                            ArrayRef<CatchHandler *> Handlers) {
+  WinEHTryBlockMapEntry TBME;
+  TBME.TryLow = TryLow;
+  TBME.TryHigh = TryHigh;
+  assert(TBME.TryLow <= TBME.TryHigh);
+  for (CatchHandler *CH : Handlers) {
+    WinEHHandlerType HT;
+    if (CH->getSelector()->isNullValue()) {
+      HT.Adjectives = 0x40;
+      HT.TypeDescriptor = nullptr;
+    } else {
+      auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
+      // Selectors are always pointers to GlobalVariables with 'struct' type.
+      // The struct has two fields, adjectives and a type descriptor.
+      auto *CS = cast<ConstantStruct>(GV->getInitializer());
+      HT.Adjectives =
+          cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
+      HT.TypeDescriptor =
+          cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
+    }
+    HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
+    HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
+    TBME.HandlerArray.push_back(HT);
+  }
+  FuncInfo.TryBlockMap.push_back(TBME);
+}
+
+static void print_name(const Value *V) {
+#ifndef NDEBUG
+  if (!V) {
+    DEBUG(dbgs() << "null");
+    return;
+  }
+
+  if (const auto *F = dyn_cast<Function>(V))
+    DEBUG(dbgs() << F->getName());
+  else
+    DEBUG(V->dump());
+#endif
+}
+
+void WinEHNumbering::processCallSite(ArrayRef<ActionHandler *> Actions,
+                                     ImmutableCallSite CS) {
+  int FirstMismatch = 0;
+  for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
+       ++FirstMismatch) {
+    if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
+        Actions[FirstMismatch]->getHandlerBlockOrFunc())
+      break;
+    delete Actions[FirstMismatch];
+  }
+
+  bool EnteringScope = (int)Actions.size() > FirstMismatch;
+
+  // Don't recurse while we are looping over the handler stack.  Instead, defer
+  // the numbering of the catch handlers until we are done popping.
+  SmallVector<CatchHandler *, 4> PoppedCatches;
+  for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
+    if (auto *CH = dyn_cast<CatchHandler>(HandlerStack.back())) {
+      PoppedCatches.push_back(CH);
+    } else {
+      // Delete cleanup handlers
+      delete HandlerStack.back();
+    }
+    HandlerStack.pop_back();
+  }
+
+  // We need to create a new state number if we are exiting a try scope and we
+  // will not push any more actions.
+  int TryHigh = NextState - 1;
+  if (!EnteringScope && !PoppedCatches.empty()) {
+    createUnwindMapEntry(currentEHNumber(), nullptr);
+    ++NextState;
+  }
+
+  int LastTryLowIdx = 0;
+  for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
+    CatchHandler *CH = PoppedCatches[I];
+    if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
+      int TryLow = CH->getEHState();
+      auto Handlers =
+          makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
+      createTryBlockMapEntry(TryLow, TryHigh, Handlers);
+      LastTryLowIdx = I + 1;
+    }
+  }
+
+  for (CatchHandler *CH : PoppedCatches) {
+    if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc()))
+      calculateStateNumbers(*F);
+    delete CH;
+  }
+
+  bool LastActionWasCatch = false;
+  for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
+    // We can reuse eh states when pushing two catches for the same invoke.
+    bool CurrActionIsCatch = isa<CatchHandler>(Actions[I]);
+    // FIXME: Reenable this optimization!
+    if (CurrActionIsCatch && LastActionWasCatch && false) {
+      Actions[I]->setEHState(currentEHNumber());
+    } else {
+      createUnwindMapEntry(currentEHNumber(), Actions[I]);
+      Actions[I]->setEHState(NextState);
+      NextState++;
+      DEBUG(dbgs() << "Creating unwind map entry for: (");
+      print_name(Actions[I]->getHandlerBlockOrFunc());
+      DEBUG(dbgs() << ", " << currentEHNumber() << ")\n");
+    }
+    HandlerStack.push_back(Actions[I]);
+    LastActionWasCatch = CurrActionIsCatch;
+  }
+
+  DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+}
+
+void WinEHNumbering::calculateStateNumbers(const Function &F) {
+  auto I = VisitedHandlers.insert(&F);
+  if (!I.second)
+    return; // We've already visited this handler, don't renumber it.
+
+  DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
+  SmallVector<ActionHandler *, 4> ActionList;
+  for (const BasicBlock &BB : F) {
+    for (const Instruction &I : BB) {
+      const auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI || CI->doesNotThrow())
+        continue;
+      processCallSite(None, CI);
+    }
+    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    const LandingPadInst *LPI = II->getLandingPadInst();
+    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+    if (!ActionsCall)
+      continue;
+    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+    parseEHActions(ActionsCall, ActionList);
+    processCallSite(ActionList, II);
+    ActionList.clear();
+    FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
+  }
+
+  FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
 }
 
 /// clear - Clear out all the function-specific state. This returns this
@@ -462,8 +652,7 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
   if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
     for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
       Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
-           i != e; ++i) {
+      for (auto i : post_order(T)) {
         if (i->isFloatingPointTy()) {
           MMI->setUsesVAFloatArgument(true);
           return;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 93699a7..64d606a 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -650,6 +650,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   MDNode *Var = SD->getVariable();
   MDNode *Expr = SD->getExpression();
   DebugLoc DL = SD->getDebugLoc();
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
 
   if (SD->getKind() == SDDbgValue::FRAMEIX) {
     // Stack address; this needs to be lowered in target-dependent fashion.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index ece38f3..4a28a4b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4033,6 +4033,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Node->getOpcode() == ISD::SETCC) {
     OVT = Node->getOperand(0).getSimpleValueType();
   }
+  if (Node->getOpcode() == ISD::BR_CC)
+    OVT = Node->getOperand(2).getSimpleValueType();
   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   SDLoc dl(Node);
   SDValue Tmp1, Tmp2, Tmp3;
@@ -4188,11 +4190,28 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::BR_CC: {
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(1))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+    Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0),
+                                  Node->getOperand(0), Node->getOperand(1),
+                                  Tmp1, Tmp2, Node->getOperand(4)));
+    break;
+  }
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+  case ISD::FCOPYSIGN:
   case ISD::FPOW: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
@@ -4201,10 +4220,40 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp3, DAG.getIntPtrConstant(0)));
     break;
   }
-  case ISD::FLOG2:
-  case ISD::FEXP2:
+  case ISD::FMA: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+    Results.push_back(
+        DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                    DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
+                    DAG.getIntPtrConstant(0)));
+    break;
+  }
+  case ISD::FPOWI: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp3, DAG.getIntPtrConstant(0)));
+    break;
+  }
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FROUND:
+  case ISD::FTRUNC:
+  case ISD::FNEG:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
   case ISD::FLOG:
-  case ISD::FEXP: {
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FABS:
+  case ISD::FEXP:
+  case ISD::FEXP2: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index cef3fc9..9de85d7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -593,6 +593,7 @@ private:
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
+  SDValue SplitVecOp_TruncateHelper(SDNode *N, unsigned TruncateOp);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -600,7 +601,6 @@ private:
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
-  SDValue SplitVecOp_TRUNCATE(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 03c2734..408d5ed 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -379,8 +379,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
 
   // There are currently two cases of vector promotion:
   // 1) Bitcasting a vector of integers to a different type to a vector of the
-  //    same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64.
-  // 2) Extending a vector of floats to a vector of the same number oflarger
+  //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
+  // 2) Extending a vector of floats to a vector of the same number of larger
   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
   MVT VT = Op.getSimpleValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f7e4557..f000902 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1293,7 +1293,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
-    case ISD::TRUNCATE:          Res = SplitVecOp_TRUNCATE(N); break;
+    case ISD::TRUNCATE:
+      Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+      break;
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1304,20 +1306,32 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::VSELECT:
       Res = SplitVecOp_VSELECT(N, OpNo);
       break;
-    case ISD::CTTZ:
-    case ISD::CTLZ:
-    case ISD::CTPOP:
-    case ISD::FP_EXTEND:
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
-    case ISD::FTRUNC:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
+    case ISD::CTTZ:
+    case ISD::CTLZ:
+    case ISD::CTPOP:
+    case ISD::FP_EXTEND:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
       Res = SplitVecOp_UnaryOp(N);
       break;
+    case ISD::FTRUNC:
+      Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+      break;
     }
   }
 
@@ -1581,7 +1595,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N,
+                                                    unsigned TruncateOp) {
   // The result type is legal, but the input type is illegal.  If splitting
   // ends up with the result type of each half still being legal, just
   // do that.  If, however, that would result in an illegal result type,
@@ -1624,8 +1639,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
                                 NumElements/2);
-  SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
-  SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+  SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+  SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
   // Concatenate them to get the full intermediate truncation result.
   EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
   SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -1634,7 +1649,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   // type. This should normally be something that ends up being legal directly,
   // but in theory if a target has very wide vectors and an annoyingly
   // restricted set of legal types, this split can chain to build things up.
-  return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+  return DAG.getNode(TruncateOp, DL, OutVT, InterVec);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8b54e656..fd0fa31 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -173,7 +173,7 @@ public:
       HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGRRList() {
+  ~ScheduleDAGRRList() override {
     delete HazardRec;
     delete AvailableQueue;
   }
@@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
 
       // If one or more successors has been unscheduled, then the current
       // node is no longer available.
-      if (!TrySU->isAvailable)
+      if (!TrySU->isAvailable || !TrySU->NodeQueueId)
         CurSU = AvailableQueue->pop();
       else {
+        // Available and in AvailableQueue
         AvailableQueue->remove(TrySU);
         CurSU = TrySU;
       }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2cd1f4b..6351fa2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -44,7 +44,7 @@ namespace llvm {
 
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
-    virtual ~ScheduleDAGSDNodes() {}
+    ~ScheduleDAGSDNodes() override {}
 
     /// Run - perform scheduling.
     ///
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 418b58e..eee4a4b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -76,7 +76,7 @@ public:
     HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGVLIW() {
+  ~ScheduleDAGVLIW() override {
     delete HazardRec;
     delete AvailableQueue;
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b52f648..770f0b2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2851,10 +2851,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
         // FIXME: Entirely reasonable to perform folding of other unary
         // operations here as the need arises.
         break;
+      case ISD::TRUNCATE:
+        // Constant build vector truncation can be done with the original scalar
+        // operands but with a new build vector with the truncated value type.
+        return getNode(ISD::BUILD_VECTOR, DL, VT, BV->ops());
       case ISD::FNEG:
       case ISD::FABS:
+      case ISD::FCEIL:
+      case ISD::FTRUNC:
+      case ISD::FFLOOR:
       case ISD::FP_EXTEND:
-      case ISD::TRUNCATE:
       case ISD::UINT_TO_FP:
       case ISD::SINT_TO_FP: {
         // Let the above scalar folding handle the folding of each element.
@@ -2870,6 +2876,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
         }
         if (Ops.size() == VT.getVectorNumElements())
           return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        break;
       }
       }
     }
@@ -3628,7 +3635,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
 
     CSEMap.InsertNode(N, IP);
   } else {
-
     N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
   }
 
@@ -3791,12 +3797,27 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
     return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
   }
 
-  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+  EVT IntVT = VT.getScalarType();
+  if (!IntVT.isInteger())
+    IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
   if (NumBits > 8) {
     // Use a multiplication with 0x010101... to extend the input to the
     // required length.
     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
-    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+    Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+                        DAG.getConstant(Magic, IntVT));
+  }
+
+  if (VT != Value.getValueType() && !VT.isInteger())
+    Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value);
+  if (VT != Value.getValueType()) {
+    assert(VT.getVectorElementType() == Value.getValueType() &&
+           "value type should be one vector element here");
+    SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value);
+    Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps);
   }
 
   return Value;
@@ -4276,7 +4297,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
 SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol, bool AlwaysInline,
-                                MachinePointerInfo DstPtrInfo,
+                                bool isTailCall, MachinePointerInfo DstPtrInfo,
                                 MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4334,15 +4355,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
-                                 unsigned Align, bool isVol,
+                                 unsigned Align, bool isVol, bool isTailCall,
                                  MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4389,15 +4411,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
-                                unsigned Align, bool isVol,
+                                unsigned Align, bool isVol, bool isTailCall,
                                 MachinePointerInfo DstPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4446,7 +4469,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
@@ -5574,8 +5598,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 /// For IROrder, we keep the smaller of the two
 SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
   DebugLoc NLoc = N->getDebugLoc();
-  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) &&
-    (OLoc.getDebugLoc() != NLoc)) {
+  if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
     N->setDebugLoc(DebugLoc());
   }
   unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -5885,6 +5908,8 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
 SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
                                       unsigned R, bool IsIndirect, uint64_t Off,
                                       DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
 }
 
@@ -5892,6 +5917,8 @@ SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
 SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
                                               const Value *C, uint64_t Off,
                                               DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O);
 }
 
@@ -5899,6 +5926,8 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
                                                 unsigned FI, uint64_t Off,
                                                 DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6c14e79..32d2aae 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -35,6 +35,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -997,14 +998,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     const DbgValueInst *DI = DDI.getDI();
     DebugLoc dl = DDI.getdl();
     unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
-    MDNode *Variable = DI->getVariable();
-    MDNode *Expr = DI->getExpression();
+    MDLocalVariable *Variable = DI->getVariable();
+    MDExpression *Expr = DI->getExpression();
+    assert(Variable->isValidLocationForIntrinsic(dl) &&
+           "Expected inlined-at fields to agree");
     uint64_t Offset = DI->getOffset();
     // A dbg.value for an alloca is always indirect.
     bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
     SDDbgValue *SDV;
     if (Val.getNode()) {
-      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect,
+      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
                                     Val)) {
         SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
                               IsIndirect, Offset, dl, DbgSDNodeOrder);
@@ -4447,11 +4450,9 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
 /// At the end of instruction selection, they will be inserted to the entry BB.
-bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
-                                                   MDNode *Variable,
-                                                   MDNode *Expr, int64_t Offset,
-                                                   bool IsIndirect,
-                                                   const SDValue &N) {
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
+    const Value *V, MDLocalVariable *Variable, MDExpression *Expr,
+    MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
   const Argument *Arg = dyn_cast<Argument>(V);
   if (!Arg)
     return false;
@@ -4460,8 +4461,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
   // Ignore inlined function arguments here.
+  //
+  // FIXME: Should we be checking DL->inlinedAt() to determine this?
   DIVariable DV(Variable);
-  if (DV.isInlinedFnArgument(MF.getFunction()))
+  if (!DV->getScope()->getSubprogram()->describes(MF.getFunction()))
     return false;
 
   Optional<MachineOperand> Op;
@@ -4502,13 +4505,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   if (!Op)
     return false;
 
+  assert(Variable->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   if (Op->isReg())
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE),
-                IsIndirect, Op->getReg(), Offset, Variable, Expr));
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+                Op->getReg(), Offset, Variable, Expr));
   else
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
             .addOperand(*Op)
             .addImm(Offset)
             .addMetadata(Variable)
@@ -4589,9 +4594,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false,
-                              MachinePointerInfo(I.getArgOperand(0)),
-                              MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               false, isTC,
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MC);
     return nullptr;
   }
   case Intrinsic::memset: {
@@ -4608,8 +4616,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                              MachinePointerInfo(I.getArgOperand(0))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               isTC, MachinePointerInfo(I.getArgOperand(0)));
+    updateDAGForMaybeTailCall(MS);
     return nullptr;
   }
   case Intrinsic::memmove: {
@@ -4628,19 +4638,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                               MachinePointerInfo(I.getArgOperand(0)),
-                               MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                                isTC, MachinePointerInfo(I.getArgOperand(0)),
+                                MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MM);
     return nullptr;
   }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    MDLocalVariable *Variable = DI.getVariable();
+    MDExpression *Expression = DI.getExpression();
     const Value *Address = DI.getAddress();
-    DIVariable DIVar(Variable);
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgDeclareInst should be either null or a DIVariable.");
+    DIVariable DIVar = Variable;
     if (!Address || !DIVar) {
       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return nullptr;
@@ -4663,7 +4673,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         Address = BCI->getOperand(0);
       // Parameters are handled specially.
       bool isParameter =
-        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+        (DIVariable(Variable)->getTag() == dwarf::DW_TAG_arg_variable ||
          isa<Argument>(Address));
 
       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
@@ -4677,7 +4687,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         else {
           // Address is an argument, so try to emit its dbg value using
           // virtual register info from the FuncInfo.ValueMap.
-          EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N);
+          EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+                                   N);
           return nullptr;
         }
       } else if (AI)
@@ -4694,7 +4705,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     } else {
       // If Address is an argument then try to emit its dbg value using
       // virtual register info from the FuncInfo.ValueMap.
-      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false,
+      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
                                     N)) {
         // If variable is pinned by a alloca in dominating bb then
         // use StaticAllocaMap.
@@ -4717,14 +4728,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_value: {
     const DbgValueInst &DI = cast<DbgValueInst>(I);
-    DIVariable DIVar(DI.getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgValueInst should be either null or a DIVariable.");
+    DIVariable DIVar = DI.getVariable();
     if (!DIVar)
       return nullptr;
 
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    MDLocalVariable *Variable = DI.getVariable();
+    MDExpression *Expression = DI.getExpression();
     uint64_t Offset = DI.getOffset();
     const Value *V = DI.getValue();
     if (!V)
@@ -4745,7 +4754,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (N.getNode()) {
         // A dbg.value for an alloca is always indirect.
         bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
-        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset,
+        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
                                       IsIndirect, N)) {
           SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
                                 IsIndirect, Offset, dl, SDNodeOrder);
@@ -5360,6 +5369,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::clear_cache:
     return TLI.getClearCacheBuiltinName();
+  case Intrinsic::eh_actions:
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return nullptr;
   case Intrinsic::donothing:
     // ignore
     return nullptr;
@@ -5397,14 +5409,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
     // is the same on all targets.
     for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
-      AllocaInst *Slot =
-          cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts());
+      Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Skip null pointers. They represent a hole in index space.
+      AllocaInst *Slot = cast<AllocaInst>(Arg);
       assert(FuncInfo.StaticAllocaMap.count(Slot) &&
              "can only escape static allocas");
       int FI = FuncInfo.StaticAllocaMap[Slot];
       MCSymbol *FrameAllocSym =
-          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(),
-                                                               Idx);
+          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+              GlobalValue::getRealLinkageName(MF.getName()), Idx);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
               TII->get(TargetOpcode::FRAME_ALLOC))
           .addSym(FrameAllocSym)
@@ -5424,8 +5438,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
     unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
     MCSymbol *FrameAllocSym =
-        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(),
-                                                             IdxVal);
+        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+            GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
 
     // Create a TargetExternalSymbol for the label to avoid any target lowering
     // that would make this PC relative.
@@ -5446,16 +5460,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::eh_begincatch:
   case Intrinsic::eh_endcatch:
     llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
-  case Intrinsic::eh_unwindhelp: {
-    AllocaInst *Slot =
-        cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
-    assert(FuncInfo.StaticAllocaMap.count(Slot) &&
-           "can only use static allocas with llvm.eh.unwindhelp");
-    int FI = FuncInfo.StaticAllocaMap[Slot];
-    // TODO: Save this in the not-yet-existant WinEHFuncInfo struct.
-    (void)FI;
-    return nullptr;
-  }
   }
 }
 
@@ -5546,6 +5550,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     // Skip the first return-type Attribute to get to params.
     Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
     Args.push_back(Entry);
+
+    // If we have an explicit sret argument that is an Instruction, (i.e., it
+    // might point to function-local memory), we can't meaningfully tail-call.
+    if (Entry.isSRet && isa<Instruction>(V))
+      isTailCall = false;
   }
 
   // Check if target-independent constraints permit a tail call here.
@@ -7183,6 +7192,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     Entry.Alignment = Align;
     CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
     CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+
+    // sret demotion isn't compatible with tail-calls, since the sret argument
+    // points into the callers stack frame.
+    CLI.IsTailCall = false;
   } else {
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
@@ -7790,3 +7803,17 @@ MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
     return nullptr;
   return I;
 }
+
+/// During lowering new call nodes can be created (such as memset, etc.).
+/// Those will become new roots of the current DAG, but complications arise
+/// when they are tail calls. In such cases, the call lowering will update
+/// the root, but the builder still needs to know that a tail call has been
+/// lowered in order to avoid generating an additional return.
+void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
+  // If the node is null, we do have a tail call.
+  if (MaybeTC.getNode() != nullptr)
+    DAG.setRoot(MaybeTC);
+  else
+    HasTailCall = true;
+}
+
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 30240d8..a27f470 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -397,7 +397,6 @@ private:
     StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
                                  FailureMBB(nullptr), Guard(nullptr),
                                  GuardReg(0) { }
-    ~StackProtectorDescriptor() { }
 
     /// Returns true if all fields of the stack protector descriptor are
     /// initialized implying that we should/are ready to emit a stack protector.
@@ -823,12 +822,17 @@ private:
   /// EmitFuncArgumentDbgValue - If V is an function argument then create
   /// corresponding DBG_VALUE machine instruction for it now. At the end of
   /// instruction selection, they will be inserted to the entry BB.
-  bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
+  bool EmitFuncArgumentDbgValue(const Value *V, MDLocalVariable *Variable,
+                                MDExpression *Expr, MDLocation *DL,
                                 int64_t Offset, bool IsIndirect,
                                 const SDValue &N);
 
   /// Return the next block after MBB, or nullptr if there is none.
   MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
+
+  /// Update the DAG and DAG builder with the relevant information after
+  /// a new root node has been created which could be a tail call.
+  void updateDAGForMaybeTailCall(SDValue MaybeTC);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5898da4..636c0a7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -520,22 +520,20 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getNodeId() != -1)
     OS << " [ID=" << getNodeId() << ']';
 
-  DebugLoc dl = getDebugLoc();
-  if (G && !dl.isUnknown()) {
-    DIScope
-      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
-    OS << " dbg:";
-    assert((!Scope || Scope.isScope()) &&
-      "Scope of a DebugLoc should be null or a DIScope.");
-    // Omit the directory, since it's usually long and uninteresting.
-    if (Scope)
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << dl.getLine();
-    if (dl.getCol() != 0)
-      OS << ':' << dl.getCol();
-  }
+  if (!G)
+    return;
+
+  MDLocation *L = getDebugLoc();
+  if (!L)
+    return;
+
+  if (auto *Scope = L->getScope())
+    OS << Scope->getFilename();
+  else
+    OS << "<unknown>";
+  OS << ':' << L->getLine();
+  if (unsigned C = L->getColumn())
+    OS << ':' << C;
 }
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4d2af3f..1e116dd 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -500,12 +501,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       MachineBasicBlock::iterator InsertPos = Def;
       const MDNode *Variable = MI->getDebugVariable();
       const MDNode *Expr = MI->getDebugExpression();
+      DebugLoc DL = MI->getDebugLoc();
       bool IsIndirect = MI->isIndirectDebugValue();
       unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+      assert(cast<MDLocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       // Def is never a terminator here, so it is ok to increment InsertPos.
-      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
-              TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset,
-              Variable, Expr);
+      BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
+              IsIndirect, LDI->second, Offset, Variable, Expr);
 
       // If this vreg is directly copied into an exported register then
       // that COPY instructions also need DBG_VALUE, if it is the only
@@ -523,9 +526,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
         CopyUseMI = nullptr; break;
       }
       if (CopyUseMI) {
+        // Use MI's debug location, which describes where Variable was
+        // declared, rather than whatever is attached to CopyUseMI.
         MachineInstr *NewMI =
-            BuildMI(*MF, CopyUseMI->getDebugLoc(),
-                    TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+            BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
                     CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
         MachineBasicBlock::iterator Pos = CopyUseMI;
         EntryMBB->insertAfter(Pos, NewMI);
@@ -670,8 +674,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 #endif
   {
     BlockNumber = FuncInfo->MBB->getNumber();
-    BlockName = MF->getName().str() + ":" +
-                FuncInfo->MBB->getBasicBlock()->getName().str();
+    BlockName =
+        (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
@@ -929,53 +933,74 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
   MF->getMMI().addPersonality(
       MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts()));
-  if (MF->getMMI().getPersonalityType() == EHPersonality::MSVC_Win64SEH) {
-    // Make virtual registers and a series of labels that fill in values for the
-    // clauses.
-    auto &RI = MF->getRegInfo();
-    FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
+  EHPersonality Personality = MF->getMMI().getPersonalityType();
 
-    // Get all invoke BBs that will unwind into the clause BBs.
+  if (isMSVCEHPersonality(Personality)) {
+    SmallVector<MachineBasicBlock *, 4> ClauseBBs;
+    const IntrinsicInst *Actions =
+        dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
+    // Get all invoke BBs that unwind to this landingpad.
     SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
                                                   MBB->pred_end());
+    if (Actions && Actions->getIntrinsicID() == Intrinsic::eh_actions) {
+      // If this is a call to llvm.eh.actions followed by indirectbr, then we've
+      // run WinEHPrepare, and we should remove this block from the machine CFG.
+      // Mark the targets of the indirectbr as landingpads instead.
+      for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
+        MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
+        // Add the edge from the invoke to the clause.
+        for (MachineBasicBlock *InvokeBB : InvokeBBs)
+          InvokeBB->addSuccessor(ClauseBB);
+      }
+    } else {
+      // Otherwise, we haven't done the preparation, and we need to invent some
+      // clause basic blocks that branch into the landingpad.
+      // FIXME: Remove this code once SEH preparation works.
+
+      // Make virtual registers and a series of labels that fill in values for
+      // the clauses.
+      auto &RI = MF->getRegInfo();
+      FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
+
+      // Emit separate machine basic blocks with separate labels for each clause
+      // before the main landing pad block.
+      MachineInstrBuilder SelectorPHI = BuildMI(
+          *MBB, MBB->begin(), SDB->getCurDebugLoc(),
+          TII->get(TargetOpcode::PHI), FuncInfo->ExceptionSelectorVirtReg);
+      for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
+        // Skip filter clauses, we can't implement them.
+        if (LPadInst->isFilter(I))
+          continue;
 
-    // Emit separate machine basic blocks with separate labels for each clause
-    // before the main landing pad block.
-    MachineInstrBuilder SelectorPHI = BuildMI(
-        *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI),
-        FuncInfo->ExceptionSelectorVirtReg);
-    for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
-      // Skip filter clauses, we can't implement them yet.
-      if (LPadInst->isFilter(I))
-        continue;
-
-      MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
-      MF->insert(MBB, ClauseBB);
+        MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
+        MF->insert(MBB, ClauseBB);
 
-      // Add the edge from the invoke to the clause.
-      for (MachineBasicBlock *InvokeBB : InvokeBBs)
-        InvokeBB->addSuccessor(ClauseBB);
+        // Add the edge from the invoke to the clause.
+        for (MachineBasicBlock *InvokeBB : InvokeBBs)
+          InvokeBB->addSuccessor(ClauseBB);
 
-      // Mark the clause as a landing pad or MI passes will delete it.
-      ClauseBB->setIsLandingPad();
+        // Mark the clause as a landing pad or MI passes will delete it.
+        ClauseBB->setIsLandingPad();
 
-      GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
+        GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
 
-      // Start the BB with a label.
-      MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
-      BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
-          .addSym(ClauseLabel);
+        // Start the BB with a label.
+        MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
+        BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
+            .addSym(ClauseLabel);
 
-      // Construct a simple BB that defines a register with the typeid constant.
-      FuncInfo->MBB = ClauseBB;
-      FuncInfo->InsertPt = ClauseBB->end();
-      unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
-      CurDAG->setRoot(SDB->getRoot());
-      SDB->clear();
-      CodeGenAndEmitDAG();
+        // Construct a simple BB that defines a register with the typeid
+        // constant.
+        FuncInfo->MBB = ClauseBB;
+        FuncInfo->InsertPt = ClauseBB->end();
+        unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
+        CurDAG->setRoot(SDB->getRoot());
+        SDB->clear();
+        CodeGenAndEmitDAG();
 
-      // Add the typeid virtual register to the phi in the main landing pad.
-      SelectorPHI.addReg(VReg).addMBB(ClauseBB);
+        // Add the typeid virtual register to the phi in the main landing pad.
+        SelectorPHI.addReg(VReg).addMBB(ClauseBB);
+      }
     }
 
     // Remove the edge from the invoke to the lpad.
@@ -986,6 +1011,12 @@ void SelectionDAGISel::PrepareEHLandingPad() {
     // pad block.
     FuncInfo->MBB = MBB;
     FuncInfo->InsertPt = MBB->end();
+
+    // Transfer EH state number assigned to the IR block to the MBB.
+    if (Personality == EHPersonality::MSVC_CXX) {
+      WinEHFuncInfo &FI = MF->getMMI().getWinEHFuncInfo(MF->getFunction());
+      MF->getMMI().addWinEHState(MBB, FI.LandingPadStateMap[LPadInst]);
+    }
     return;
   }
 
@@ -1165,7 +1196,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     // Setup an EH landing-pad block.
     FuncInfo->ExceptionPointerVirtReg = 0;
     FuncInfo->ExceptionSelectorVirtReg = 0;
-    if (FuncInfo->MBB->isLandingPad())
+    if (LLVMBB->isLandingPad())
       PrepareEHLandingPad();
 
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
@@ -2552,7 +2583,7 @@ public:
     SelectionDAG::DAGUpdateListener(DAG),
     RecordedNodes(RN), MatchScopes(MS) { }
 
-  void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     // Some early-returns here to avoid the search if we deleted the node or
     // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
     // do, so it's unnecessary to update matching state at that point).
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 3cc7a98..a9ffa72 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -397,10 +397,11 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
         Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
     Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
-    // This handles allocas as arguments to the statepoint
-    const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
-    Ops.push_back(
-        Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+    // This handles allocas as arguments to the statepoint (this is only
+    // really meaningful for a deopt value.  For GC, we'd be trying to
+    // relocate the address of the alloca itself?)
+    Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), 
+                                                  Incoming.getValueType()));
   } else {
     // Otherwise, locate a spill slot and explicitly spill it so it
     // can be found by the runtime later.  We currently do not support
@@ -441,27 +442,25 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // heap.  This is basically just here to help catch errors during statepoint
   // insertion. TODO: This should actually be in the Verifier, but we can't get
   // to the GCStrategy from there (yet).
-  if (Builder.GFI) {
-    GCStrategy &S = Builder.GFI->getStrategy();
-    for (const Value *V : Bases) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed base pointer found in statepoint");
-      }
+  GCStrategy &S = Builder.GFI->getStrategy();
+  for (const Value *V : Bases) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed base pointer found in statepoint");
     }
-    for (const Value *V : Ptrs) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed derived pointer found in statepoint");
-      }
+  }
+  for (const Value *V : Ptrs) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed derived pointer found in statepoint");
     }
-    for (const Value *V : Relocations) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() && "non gc managed pointer relocated");
-      }
+  }
+  for (const Value *V : Relocations) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() && "non gc managed pointer relocated");
     }
   }
 #endif
@@ -523,6 +522,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     SDValue Incoming = Builder.getValue(V);
     lowerIncomingStatepointValue(Incoming, Ops, Builder);
   }
+
+  // If there are any explicit spill slots passed to the statepoint, record 
+  // them, but otherwise do not do anything special.  These are user provided
+  // allocas and give control over placement to the consumer.  In this case, 
+  // it is the contents of the slot which may get updated, not the pointer to
+  // the alloca
+  for (Value *V : StatepointSite.gc_args()) {
+    SDValue Incoming = Builder.getValue(V);
+    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+      // This handles allocas as arguments to the statepoint
+      Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), 
+                                                    Incoming.getValueType()));
+
+    }
+  }
 }
 
 void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
@@ -565,10 +579,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
   // TODO: This if should become an assert.  For now, we allow the GCStrategy
   // to be optional for backwards compatibility.  This will only last a short
   // period (i.e. a couple of weeks).
-  if (GFI) {
-    assert(GFI->getStrategy().useStatepoints() &&
-           "GCStrategy does not expect to encounter statepoints");
-  }
+  assert(GFI->getStrategy().useStatepoints() &&
+         "GCStrategy does not expect to encounter statepoints");
 #endif
 
   // Lower statepoint vmstate and gcstate arguments
@@ -614,7 +626,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
   // Add a leading constant argument with the Flags and the calling convention
   // masked together
   CallingConv::ID CallConv = CS.getCallingConv();
-  int Flags = dyn_cast<ConstantInt>(CS.getArgument(2))->getZExtValue();
+  int Flags = cast<ConstantInt>(CS.getArgument(2))->getZExtValue();
   assert(Flags == 0 && "not expected to be used");
   Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
   Ops.push_back(
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 66a6a3c..7c0b2bb 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -239,7 +239,7 @@ Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
   Constant *GEPIndices[2] = {
       ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
       ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
-  return ConstantExpr::getGetElementPtr(GV, GEPIndices);
+  return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
 }
 
 Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
@@ -249,7 +249,7 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
   for (size_t I = 0; I != Roots.size(); I++)
     EltTys.push_back(Roots[I].second->getAllocatedType());
 
-  return StructType::create(EltTys, "gc_stackentry." + F.getName().str());
+  return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
 }
 
 /// doInitialization - If this module uses the GC intrinsics, find them now. If
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 2335a88..0635173 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -46,6 +46,8 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 namespace {
 class SjLjEHPrepare : public FunctionPass {
   const TargetMachine *TM;
+  Type *doubleUnderDataTy;
+  Type *doubleUnderJBufTy;
   Type *FunctionContextTy;
   Constant *RegisterFn;
   Constant *UnregisterFn;
@@ -93,12 +95,14 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
   // builtin_setjmp uses a five word jbuf
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
-  FunctionContextTy = StructType::get(VoidPtrTy,                  // __prev
-                                      Int32Ty,                    // call_site
-                                      ArrayType::get(Int32Ty, 4), // __data
-                                      VoidPtrTy, // __personality
-                                      VoidPtrTy, // __lsda
-                                      ArrayType::get(VoidPtrTy, 5), // __jbuf
+  doubleUnderDataTy = ArrayType::get(Int32Ty, 4);
+  doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5);
+  FunctionContextTy = StructType::get(VoidPtrTy,         // __prev
+                                      Int32Ty,           // call_site
+                                      doubleUnderDataTy, // __data
+                                      VoidPtrTy,         // __personality
+                                      VoidPtrTy,         // __lsda
+                                      doubleUnderJBufTy, // __jbuf
                                       nullptr);
   RegisterFn = M.getOrInsertFunction(
       "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
@@ -204,16 +208,17 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
     IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
 
     // Reference the __data field.
-    Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
+    Value *FCData =
+        Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
 
     // The exception values come back in context->__data[0].
-    Value *ExceptionAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep");
+    Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                      0, 0, "exception_gep");
     Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
     ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
 
-    Value *SelectorAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep");
+    Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                     0, 1, "exn_selector_gep");
     Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
     substituteLPadValues(LPI, ExnVal, SelVal);
@@ -223,15 +228,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   IRBuilder<> Builder(EntryBB->getTerminator());
   if (!PersonalityFn)
     PersonalityFn = LPads[0]->getPersonalityFn();
-  Value *PersonalityFieldPtr =
-      Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep");
+  Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
+      FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
   Builder.CreateStore(
       Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
       PersonalityFieldPtr, /*isVolatile=*/true);
 
   // LSDA address
   Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
-  Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+  Value *LSDAFieldPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep");
   Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
 
   return FuncCtx;
@@ -400,16 +406,19 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   IRBuilder<> Builder(EntryBB->getTerminator());
 
   // Get a reference to the jump buffer.
-  Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
+  Value *JBufPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");
 
   // Save the frame pointer.
-  Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
+  Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
+                                               "jbuf_fp_gep");
 
   Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
   Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
 
   // Save the stack pointer.
-  Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
+  Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
+                                               "jbuf_sp_gep");
 
   Val = Builder.CreateCall(StackAddrFn, "sp");
   Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 622361e..03dd58d 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -70,7 +70,7 @@ public:
   static char ID; // Pass identification, replacement for typeid.
 
   SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
-  ~SpillPlacement() { releaseMemory(); }
+  ~SpillPlacement() override { releaseMemory(); }
 
   /// BorderConstraint - A basic block has separate constraints for entry and
   /// exit.
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 7572803..2bf2d64 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -464,7 +464,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
       continue;
     if (SlotRemap.count(VI.Slot)) {
       DEBUG(dbgs() << "Remapping debug info for ["
-                   << DIVariable(VI.Var).getName() << "].\n");
+                   << cast<MDLocalVariable>(VI.Var)->getName() << "].\n");
       VI.Slot = SlotRemap[VI.Slot];
       FixedDbg++;
     }
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 58a6d52..2162a51 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1246,20 +1246,13 @@ void TargetLoweringBase::computeRegisterProperties(
     ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
   }
 
-  // Decide how to handle f32. If the target does not have native support for
-  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  // Decide how to handle f32. If the target does not have native f32 support,
+  // expand it to i32 and we will be generating soft float library calls.
   if (!isTypeLegal(MVT::f32)) {
-    if (isTypeLegal(MVT::f64)) {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
-      TransformToType[MVT::f32] = MVT::f64;
-      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
-    } else {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
-      TransformToType[MVT::f32] = MVT::i32;
-      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
-    }
+    NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+    RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+    TransformToType[MVT::f32] = MVT::i32;
+    ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
   }
 
   if (!isTypeLegal(MVT::f16)) {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index bcf2aa7..5b795e4 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -245,9 +245,11 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
   return ".data.rel.ro";
 }
 
-static const MCSectionELF *selectELFSectionForGlobal(
-    MCContext &Ctx, const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags) {
+static const MCSectionELF *
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+                          SectionKind Kind, Mangler &Mang,
+                          const TargetMachine &TM, bool EmitUniqueSection,
+                          unsigned Flags, unsigned *NextUniqueID) {
   unsigned EntrySize = 0;
   if (Kind.isMergeableCString()) {
     if (Kind.isMergeable2ByteCString()) {
@@ -297,9 +299,13 @@ static const MCSectionELF *selectELFSectionForGlobal(
     Name.push_back('.');
     TM.getNameWithPrefix(Name, GV, Mang, true);
   }
+  unsigned UniqueID = ~0;
+  if (EmitUniqueSection && !UniqueSectionNames) {
+    UniqueID = *NextUniqueID;
+    (*NextUniqueID)++;
+  }
   return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
-                           EntrySize, Group,
-                           EmitUniqueSection && !UniqueSectionNames);
+                           EntrySize, Group, UniqueID);
 }
 
 const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
@@ -319,7 +325,7 @@ const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
   EmitUniqueSection |= GV->hasComdat();
 
   return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
-                                   EmitUniqueSection, Flags);
+                                   EmitUniqueSection, Flags, &NextUniqueID);
 }
 
 const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -332,7 +338,8 @@ const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
     return ReadOnlySection;
 
   return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
-                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC);
+                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+                                   &NextUniqueID);
 }
 
 bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index ab0f96e..35b944e 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -20,6 +20,8 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -33,6 +35,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <memory>
 
 using namespace llvm;
@@ -49,14 +52,15 @@ namespace {
 // frame allocation structure.
 typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
 
-typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't
+// quite null.
+AllocaInst *getCatchObjectSentinel() {
+  return static_cast<AllocaInst *>(nullptr) + 1;
+}
 
-enum ActionType { Catch, Cleanup };
+typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
 
 class LandingPadActions;
-class ActionHandler;
-class CatchHandler;
-class CleanupHandler;
 class LandingPadMap;
 
 typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
@@ -66,7 +70,7 @@ class WinEHPrepare : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid.
   WinEHPrepare(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID) {}
+      : FunctionPass(ID), DT(nullptr) {}
 
   bool runOnFunction(Function &Fn) override;
 
@@ -81,31 +85,62 @@ public:
 private:
   bool prepareExceptionHandlers(Function &F,
                                 SmallVectorImpl<LandingPadInst *> &LPads);
+  void promoteLandingPadValues(LandingPadInst *LPad);
+  void completeNestedLandingPad(Function *ParentFn,
+                                LandingPadInst *OutlinedLPad,
+                                const LandingPadInst *OriginalLPad,
+                                FrameVarInfoMap &VarInfo);
   bool outlineHandler(ActionHandler *Action, Function *SrcFn,
                       LandingPadInst *LPad, BasicBlock *StartBB,
                       FrameVarInfoMap &VarInfo);
+  void addStubInvokeToHandlerIfNeeded(Function *Handler, Value *PersonalityFn);
 
   void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
   CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
                                  VisitedBlockSet &VisitedBlocks);
-  CleanupHandler *findCleanupHandler(BasicBlock *StartBB, BasicBlock *EndBB);
+  void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB,
+                           BasicBlock *EndBB);
 
   void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
 
   // All fields are reset by runOnFunction.
+  DominatorTree *DT;
   EHPersonality Personality;
   CatchHandlerMapTy CatchHandlerMap;
   CleanupHandlerMapTy CleanupHandlerMap;
-  DenseMap<const LandingPadInst *, LandingPadMap>  LPadMaps;
+  DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
+
+  // This maps landing pad instructions found in outlined handlers to
+  // the landing pad instruction in the parent function from which they
+  // were cloned.  The cloned/nested landing pad is used as the key
+  // because the landing pad may be cloned into multiple handlers.
+  // This map will be used to add the llvm.eh.actions call to the nested
+  // landing pads after all handlers have been outlined.
+  DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP;
+
+  // This maps blocks in the parent function which are destinations of
+  // catch handlers to cloned blocks in (other) outlined handlers. This
+  // handles the case where a nested landing pads has a catch handler that
+  // returns to a handler function rather than the parent function.
+  // The original block is used as the key here because there should only
+  // ever be one handler function from which the cloned block is not pruned.
+  // The original block will be pruned from the parent function after all
+  // handlers have been outlined.  This map will be used to adjust the
+  // return instructions of handlers which return to the block that was
+  // outlined into a handler.  This is done after all handlers have been
+  // outlined but before the outlined code is pruned from the parent function.
+  DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
 };
 
 class WinEHFrameVariableMaterializer : public ValueMaterializer {
 public:
   WinEHFrameVariableMaterializer(Function *OutlinedFn,
                                  FrameVarInfoMap &FrameVarInfo);
-  ~WinEHFrameVariableMaterializer() {}
+  ~WinEHFrameVariableMaterializer() override {}
+
+  Value *materializeValueFor(Value *V) override;
 
-  virtual Value *materializeValueFor(Value *V) override;
+  void escapeCatchObject(Value *V);
 
 private:
   FrameVarInfoMap &FrameVarInfo;
@@ -119,42 +154,23 @@ public:
 
   bool isInitialized() { return OriginLPad != nullptr; }
 
-  bool mapIfEHPtrLoad(const LoadInst *Load) {
-    return mapIfEHLoad(Load, EHPtrStores, EHPtrStoreAddrs);
-  }
-  bool mapIfSelectorLoad(const LoadInst *Load) {
-    return mapIfEHLoad(Load, SelectorStores, SelectorStoreAddrs);
-  }
-
+  bool isOriginLandingPadBlock(const BasicBlock *BB) const;
   bool isLandingPadSpecificInst(const Instruction *Inst) const;
 
-  void remapSelector(ValueToValueMapTy &VMap, Value *MappedValue) const;
+  void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                     Value *SelectorValue) const;
 
 private:
-  bool mapIfEHLoad(const LoadInst *Load,
-                   SmallVectorImpl<const StoreInst *> &Stores,
-                   SmallVectorImpl<const Value *> &StoreAddrs);
-
   const LandingPadInst *OriginLPad;
   // We will normally only see one of each of these instructions, but
   // if more than one occurs for some reason we can handle that.
   TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
   TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
-
-  // In optimized code, there will typically be at most one instance of
-  // each of the following, but in unoptimized IR it is not uncommon
-  // for the values to be stored, loaded and then stored again.  In that
-  // case we will create a second entry for each store and store address.
-  SmallVector<const StoreInst *, 2> EHPtrStores;
-  SmallVector<const StoreInst *, 2> SelectorStores;
-  SmallVector<const Value *, 2> EHPtrStoreAddrs;
-  SmallVector<const Value *, 2> SelectorStoreAddrs;
 };
 
 class WinEHCloningDirectorBase : public CloningDirector {
 public:
-  WinEHCloningDirectorBase(Function *HandlerFn,
-                           FrameVarInfoMap &VarInfo,
+  WinEHCloningDirectorBase(Function *HandlerFn, FrameVarInfoMap &VarInfo,
                            LandingPadMap &LPadMap)
       : Materializer(HandlerFn, VarInfo),
         SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
@@ -180,6 +196,9 @@ public:
   virtual CloningAction handleResume(ValueToValueMapTy &VMap,
                                      const ResumeInst *Resume,
                                      BasicBlock *NewBB) = 0;
+  virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                         const LandingPadInst *LPad,
+                                         BasicBlock *NewBB) = 0;
 
   ValueMaterializer *getValueMaterializer() override { return &Materializer; }
 
@@ -192,11 +211,13 @@ protected:
 
 class WinEHCatchDirector : public WinEHCloningDirectorBase {
 public:
-  WinEHCatchDirector(Function *CatchFn, Value *Selector,
-                     FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+  WinEHCatchDirector(
+      Function *CatchFn, Value *Selector, FrameVarInfoMap &VarInfo,
+      LandingPadMap &LPadMap,
+      DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads)
       : WinEHCloningDirectorBase(CatchFn, VarInfo, LPadMap),
         CurrentSelector(Selector->stripPointerCasts()),
-        ExceptionObjectVar(nullptr) {}
+        ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads) {}
 
   CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
                                  const Instruction *Inst,
@@ -210,21 +231,28 @@ public:
                              BasicBlock *NewBB) override;
   CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
                              BasicBlock *NewBB) override;
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
 
-  const Value *getExceptionVar() { return ExceptionObjectVar; }
+  Value *getExceptionVar() { return ExceptionObjectVar; }
   TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
 
 private:
   Value *CurrentSelector;
 
-  const Value *ExceptionObjectVar;
+  Value *ExceptionObjectVar;
   TinyPtrVector<BasicBlock *> ReturnTargets;
+
+  // This will be a reference to the field of the same name in the WinEHPrepare
+  // object which instantiates this WinEHCatchDirector object.
+  DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP;
 };
 
 class WinEHCleanupDirector : public WinEHCloningDirectorBase {
 public:
-  WinEHCleanupDirector(Function *CleanupFn,
-                       FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+  WinEHCleanupDirector(Function *CleanupFn, FrameVarInfoMap &VarInfo,
+                       LandingPadMap &LPadMap)
       : WinEHCloningDirectorBase(CleanupFn, VarInfo, LPadMap) {}
 
   CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
@@ -239,66 +267,9 @@ public:
                              BasicBlock *NewBB) override;
   CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
                              BasicBlock *NewBB) override;
-};
-
-class ActionHandler {
-public:
-  ActionHandler(BasicBlock *BB, ActionType Type)
-      : StartBB(BB), Type(Type), HandlerBlockOrFunc(nullptr) {}
-
-  ActionType getType() const { return Type; }
-  BasicBlock *getStartBlock() const { return StartBB; }
-
-  bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; }
-
-  void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; }
-  Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; }
-
-private:
-  BasicBlock *StartBB;
-  ActionType Type;
-
-  // Can be either a BlockAddress or a Function depending on the EH personality.
-  Constant *HandlerBlockOrFunc;
-};
-
-class CatchHandler : public ActionHandler {
-public:
-  CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB)
-      : ActionHandler(BB, ActionType::Catch), Selector(Selector),
-        NextBB(NextBB), ExceptionObjectVar(nullptr) {}
-
-  // Method for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ActionHandler *H) {
-    return H->getType() == ActionType::Catch;
-  }
-
-  Constant *getSelector() const { return Selector; }
-  BasicBlock *getNextBB() const { return NextBB; }
-
-  const Value *getExceptionVar() { return ExceptionObjectVar; }
-  TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
-
-  void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; }
-  void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) {
-    ReturnTargets = Targets;
-  }
-
-private:
-  Constant *Selector;
-  BasicBlock *NextBB;
-  const Value *ExceptionObjectVar;
-  TinyPtrVector<BasicBlock *> ReturnTargets;
-};
-
-class CleanupHandler : public ActionHandler {
-public:
-  CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {}
-
-  // Method for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ActionHandler *H) {
-    return H->getType() == ActionType::Cleanup;
-  }
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
 };
 
 class LandingPadActions {
@@ -313,6 +284,7 @@ public:
 
   bool includesCleanup() const { return HasCleanupHandlers; }
 
+  SmallVectorImpl<ActionHandler *> &actions() { return Actions; }
   SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
   SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
 
@@ -336,8 +308,8 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
 
 // FIXME: Remove this once the backend can handle the prepared IR.
 static cl::opt<bool>
-SEHPrepare("sehprepare", cl::Hidden,
-           cl::desc("Prepare functions with SEH personalities"));
+    SEHPrepare("sehprepare", cl::Hidden,
+               cl::desc("Prepare functions with SEH personalities"));
 
 bool WinEHPrepare::runOnFunction(Function &Fn) {
   SmallVector<LandingPadInst *, 4> LPads;
@@ -360,6 +332,8 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
   if (!isMSVCEHPersonality(Personality))
     return false;
 
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
   if (isAsynchronousEHPersonality(Personality) && !SEHPrepare) {
     // Replace all resume instructions with unreachable.
     // FIXME: Remove this once the backend can handle the prepared IR.
@@ -375,11 +349,11 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
   return true;
 }
 
-bool WinEHPrepare::doFinalization(Module &M) {
-  return false;
-}
+bool WinEHPrepare::doFinalization(Module &M) { return false; }
 
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
 
 bool WinEHPrepare::prepareExceptionHandlers(
     Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
@@ -422,9 +396,14 @@ bool WinEHPrepare::prepareExceptionHandlers(
     if (LPadHasActionList)
       continue;
 
+    // If either of the values in the aggregate returned by the landing pad is
+    // extracted and stored to memory, promote the stored value to a register.
+    promoteLandingPadValues(LPad);
+
     LandingPadActions Actions;
     mapLandingPadBlocks(LPad, Actions);
 
+    HandlersOutlined |= !Actions.actions().empty();
     for (ActionHandler *Action : Actions) {
       if (Action->hasBeenProcessed())
         continue;
@@ -436,24 +415,17 @@ bool WinEHPrepare::prepareExceptionHandlers(
       if (isAsynchronousEHPersonality(Personality)) {
         if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
           processSEHCatchHandler(CatchAction, StartBB);
-          HandlersOutlined = true;
           continue;
         }
       }
 
-      if (outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo)) {
-        HandlersOutlined = true;
-      }
-    } // End for each Action
-
-    // FIXME: We need a guard against partially outlined functions.
-    if (!HandlersOutlined)
-      continue;
+      outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo);
+    }
 
     // Replace the landing pad with a new llvm.eh.action based landing pad.
     BasicBlock *NewLPadBB = BasicBlock::Create(Context, "lpad", &F, LPadBB);
     assert(!isa<PHINode>(LPadBB->begin()));
-    Instruction *NewLPad = LPad->clone();
+    auto *NewLPad = cast<LandingPadInst>(LPad->clone());
     NewLPadBB->getInstList().push_back(NewLPad);
     while (!pred_empty(LPadBB)) {
       auto *pred = *pred_begin(LPadBB);
@@ -461,6 +433,19 @@ bool WinEHPrepare::prepareExceptionHandlers(
       Invoke->setUnwindDest(NewLPadBB);
     }
 
+    // If anyone is still using the old landingpad value, just give them undef
+    // instead. The eh pointer and selector values are not real.
+    LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
+
+    // Replace the mapping of any nested landing pad that previously mapped
+    // to this landing pad with a referenced to the cloned version.
+    for (auto &LPadPair : NestedLPtoOriginalLP) {
+      const LandingPadInst *OriginalLPad = LPadPair.second;
+      if (OriginalLPad == LPad) {
+        LPadPair.second = NewLPad;
+      }
+    }
+
     // Replace uses of the old lpad in phis with this block and delete the old
     // block.
     LPadBB->replaceSuccessorsPhiUsesWith(NewLPadBB);
@@ -474,11 +459,17 @@ bool WinEHPrepare::prepareExceptionHandlers(
       if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
         ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
         ActionArgs.push_back(CatchAction->getSelector());
+        // Find the frame escape index of the exception object alloca in the
+        // parent.
+        int FrameEscapeIdx = -1;
         Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
-        if (EHObj)
-          ActionArgs.push_back(EHObj);
-        else
-          ActionArgs.push_back(ConstantPointerNull::get(Int8PtrType));
+        if (EHObj && !isa<ConstantPointerNull>(EHObj)) {
+          auto I = FrameVarInfo.find(EHObj);
+          assert(I != FrameVarInfo.end() &&
+                 "failed to map llvm.eh.begincatch var");
+          FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I);
+        }
+        ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx));
       } else {
         ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
       }
@@ -502,6 +493,15 @@ bool WinEHPrepare::prepareExceptionHandlers(
   if (!HandlersOutlined)
     return false;
 
+  // Replace any nested landing pad stubs with the correct action handler.
+  // This must be done before we remove unreachable blocks because it
+  // cleans up references to outlined blocks that will be deleted.
+  for (auto &LPadPair : NestedLPtoOriginalLP)
+    completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo);
+  NestedLPtoOriginalLP.clear();
+
+  F.addFnAttr("wineh-parent", F.getName());
+
   // Delete any blocks that were only used by handlers that were outlined above.
   removeUnreachableBlocks(F);
 
@@ -554,26 +554,26 @@ bool WinEHPrepare::prepareExceptionHandlers(
           ++InsertPt;
           ParentAlloca =
               new AllocaInst(ParentInst->getType(), nullptr,
-                             ParentInst->getName() + ".reg2mem", InsertPt);
+                             ParentInst->getName() + ".reg2mem",
+                             AllocaInsertPt);
           new StoreInst(ParentInst, ParentAlloca, InsertPt);
         } else {
-          ParentAlloca = DemoteRegToStack(*ParentInst, true, ParentInst);
+          ParentAlloca = DemoteRegToStack(*ParentInst, true, AllocaInsertPt);
         }
       }
     }
 
-    // If the parent alloca is no longer used and only one of the handlers used
-    // it, erase the parent and leave the copy in the outlined handler.
-    if (ParentAlloca->getNumUses() == 0 && Allocas.size() == 1) {
-      ParentAlloca->eraseFromParent();
-      continue;
-    }
+    // FIXME: We should try to sink unescaped allocas from the parent frame into
+    // the child frame. If the alloca is escaped, we have to use the lifetime
+    // markers to ensure that the alloca is only live within the child frame.
 
     // Add this alloca to the list of things to escape.
     AllocasToEscape.push_back(ParentAlloca);
 
     // Next replace all outlined allocas that are mapped to it.
     for (AllocaInst *TempAlloca : Allocas) {
+      if (TempAlloca == getCatchObjectSentinel())
+        continue; // Skip catch parameter sentinels.
       Function *HandlerFn = TempAlloca->getParent()->getParent();
       // FIXME: Sink this GEP into the blocks where it is used.
       Builder.SetInsertPoint(TempAlloca);
@@ -601,19 +601,6 @@ bool WinEHPrepare::prepareExceptionHandlers(
   Builder.SetInsertPoint(&F.getEntryBlock().back());
   Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
 
-  // Insert an alloca for the EH state in the entry block. On x86, we will also
-  // insert stores to update the EH state, but on other ISAs, the runtime does
-  // it for us.
-  // FIXME: This record is different on x86.
-  Type *UnwindHelpTy = Type::getInt64Ty(Context);
-  AllocaInst *UnwindHelp =
-      new AllocaInst(UnwindHelpTy, "unwindhelp", &F.getEntryBlock().front());
-  Builder.CreateStore(llvm::ConstantInt::get(UnwindHelpTy, -2), UnwindHelp);
-  Function *UnwindHelpFn =
-      Intrinsic::getDeclaration(M, Intrinsic::eh_unwindhelp);
-  Builder.CreateCall(UnwindHelpFn,
-                     Builder.CreateBitCast(UnwindHelp, Int8PtrType));
-
   // Clean up the handler action maps we created for this function
   DeleteContainerSeconds(CatchHandlerMap);
   CatchHandlerMap.clear();
@@ -623,6 +610,125 @@ bool WinEHPrepare::prepareExceptionHandlers(
   return HandlersOutlined;
 }
 
+void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) {
+  // If the return values of the landing pad instruction are extracted and
+  // stored to memory, we want to promote the store locations to reg values.
+  SmallVector<AllocaInst *, 2> EHAllocas;
+
+  // The landingpad instruction returns an aggregate value.  Typically, its
+  // value will be passed to a pair of extract value instructions and the
+  // results of those extracts are often passed to store instructions.
+  // In unoptimized code the stored value will often be loaded and then stored
+  // again.
+  for (auto *U : LPad->users()) {
+    ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract)
+      continue;
+
+    for (auto *EU : Extract->users()) {
+      if (auto *Store = dyn_cast<StoreInst>(EU)) {
+        auto *AV = cast<AllocaInst>(Store->getPointerOperand());
+        EHAllocas.push_back(AV);
+      }
+    }
+  }
+
+  // We can't do this without a dominator tree.
+  assert(DT);
+
+  if (!EHAllocas.empty()) {
+    PromoteMemToReg(EHAllocas, *DT);
+    EHAllocas.clear();
+  }
+
+  // After promotion, some extracts may be trivially dead. Remove them.
+  SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end());
+  for (auto *U : Users)
+    RecursivelyDeleteTriviallyDeadInstructions(U);
+}
+
+void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
+                                            LandingPadInst *OutlinedLPad,
+                                            const LandingPadInst *OriginalLPad,
+                                            FrameVarInfoMap &FrameVarInfo) {
+  // Get the nested block and erase the unreachable instruction that was
+  // temporarily inserted as its terminator.
+  LLVMContext &Context = ParentFn->getContext();
+  BasicBlock *OutlinedBB = OutlinedLPad->getParent();
+  assert(isa<UnreachableInst>(OutlinedBB->getTerminator()));
+  OutlinedBB->getTerminator()->eraseFromParent();
+  // That should leave OutlinedLPad as the last instruction in its block.
+  assert(&OutlinedBB->back() == OutlinedLPad);
+
+  // The original landing pad will have already had its action intrinsic
+  // built by the outlining loop.  We need to clone that into the outlined
+  // location.  It may also be necessary to add references to the exception
+  // variables to the outlined handler in which this landing pad is nested
+  // and remap return instructions in the nested handlers that should return
+  // to an address in the outlined handler.
+  Function *OutlinedHandlerFn = OutlinedBB->getParent();
+  BasicBlock::const_iterator II = OriginalLPad;
+  ++II;
+  // The instruction after the landing pad should now be a call to eh.actions.
+  const Instruction *Recover = II;
+  assert(match(Recover, m_Intrinsic<Intrinsic::eh_actions>()));
+  IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover->clone());
+
+  // Remap the exception variables into the outlined function.
+  WinEHFrameVariableMaterializer Materializer(OutlinedHandlerFn, FrameVarInfo);
+  SmallVector<BlockAddress *, 4> ActionTargets;
+  SmallVector<ActionHandler *, 4> ActionList;
+  parseEHActions(EHActions, ActionList);
+  for (auto *Action : ActionList) {
+    auto *Catch = dyn_cast<CatchHandler>(Action);
+    if (!Catch)
+      continue;
+    // The dyn_cast to function here selects C++ catch handlers and skips
+    // SEH catch handlers.
+    auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc());
+    if (!Handler)
+      continue;
+    // Visit all the return instructions, looking for places that return
+    // to a location within OutlinedHandlerFn.
+    for (BasicBlock &NestedHandlerBB : *Handler) {
+      auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator());
+      if (!Ret)
+        continue;
+
+      // Handler functions must always return a block address.
+      BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
+      // The original target will have been in the main parent function,
+      // but if it is the address of a block that has been outlined, it
+      // should be a block that was outlined into OutlinedHandlerFn.
+      assert(BA->getFunction() == ParentFn);
+
+      // Ignore targets that aren't part of OutlinedHandlerFn.
+      if (!LPadTargetBlocks.count(BA->getBasicBlock()))
+        continue;
+
+      // If the return value is the address ofF a block that we
+      // previously outlined into the parent handler function, replace
+      // the return instruction and add the mapped target to the list
+      // of possible return addresses.
+      BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()];
+      assert(MappedBB->getParent() == OutlinedHandlerFn);
+      BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB);
+      Ret->eraseFromParent();
+      ReturnInst::Create(Context, NewBA, &NestedHandlerBB);
+      ActionTargets.push_back(NewBA);
+    }
+  }
+  DeleteContainerPointers(ActionList);
+  ActionList.clear();
+  OutlinedBB->getInstList().push_back(EHActions);
+
+  // Insert an indirect branch into the outlined landing pad BB.
+  IndirectBrInst *IBr = IndirectBrInst::Create(EHActions, 0, OutlinedBB);
+  // Add the previously collected action targets.
+  for (auto *Target : ActionTargets)
+    IBr->addDestination(Target->getBasicBlock());
+}
+
 // This function examines a block to determine whether the block ends with a
 // conditional branch to a catch handler based on a selector comparison.
 // This function is used both by the WinEHPrepare::findSelectorComparison() and
@@ -657,6 +763,59 @@ static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
   return false;
 }
 
+static BasicBlock *createStubLandingPad(Function *Handler,
+                                        Value *PersonalityFn) {
+  // FIXME: Finish this!
+  LLVMContext &Context = Handler->getContext();
+  BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
+  Handler->getBasicBlockList().push_back(StubBB);
+  IRBuilder<> Builder(StubBB);
+  LandingPadInst *LPad = Builder.CreateLandingPad(
+      llvm::StructType::get(Type::getInt8PtrTy(Context),
+                            Type::getInt32Ty(Context), nullptr),
+      PersonalityFn, 0);
+  LPad->setCleanup(true);
+  Builder.CreateUnreachable();
+  return StubBB;
+}
+
+// Cycles through the blocks in an outlined handler function looking for an
+// invoke instruction and inserts an invoke of llvm.donothing with an empty
+// landing pad if none is found.  The code that generates the .xdata tables for
+// the handler needs at least one landing pad to identify the parent function's
+// personality.
+void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler,
+                                                  Value *PersonalityFn) {
+  ReturnInst *Ret = nullptr;
+  for (BasicBlock &BB : *Handler) {
+    TerminatorInst *Terminator = BB.getTerminator();
+    // If we find an invoke, there is nothing to be done.
+    auto *II = dyn_cast<InvokeInst>(Terminator);
+    if (II)
+      return;
+    // If we've already recorded a return instruction, keep looking for invokes.
+    if (Ret)
+      continue;
+    // If we haven't recorded a return instruction yet, try this terminator.
+    Ret = dyn_cast<ReturnInst>(Terminator);
+  }
+
+  // If we got this far, the handler contains no invokes.  We should have seen
+  // at least one return.  We'll insert an invoke of llvm.donothing ahead of
+  // that return.
+  assert(Ret);
+  BasicBlock *OldRetBB = Ret->getParent();
+  BasicBlock *NewRetBB = SplitBlock(OldRetBB, Ret);
+  // SplitBlock adds an unconditional branch instruction at the end of the
+  // parent block.  We want to replace that with an invoke call, so we can
+  // erase it now.
+  OldRetBB->getTerminator()->eraseFromParent();
+  BasicBlock *StubLandingPad = createStubLandingPad(Handler, PersonalityFn);
+  Function *F =
+      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
+  InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
+}
+
 bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
                                   LandingPadInst *LPad, BasicBlock *StartBB,
                                   FrameVarInfoMap &VarInfo) {
@@ -680,6 +839,8 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
                                SrcFn->getName() + ".cleanup", M);
   }
 
+  Handler->addFnAttr("wineh-parent", SrcFn->getName());
+
   // Generate a standard prolog to setup the frame recovery structure.
   IRBuilder<> Builder(Context);
   BasicBlock *Entry = BasicBlock::Create(Context, "entry");
@@ -696,10 +857,14 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
     LPadMap.mapLandingPad(LPad);
   if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
     Constant *Sel = CatchAction->getSelector();
-    Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap));
-    LPadMap.remapSelector(VMap, ConstantInt::get(Type::getInt32Ty(Context), 1));
+    Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap,
+                                          NestedLPtoOriginalLP));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          ConstantInt::get(Type::getInt32Ty(Context), 1));
   } else {
     Director.reset(new WinEHCleanupDirector(Handler, VarInfo, LPadMap));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          UndefValue::get(Type::getInt32Ty(Context)));
   }
 
   SmallVector<ReturnInst *, 8> Returns;
@@ -735,12 +900,45 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
   Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList());
   FirstClonedBB->eraseFromParent();
 
+  // Make sure we can identify the handler's personality later.
+  addStubInvokeToHandlerIfNeeded(Handler, LPad->getPersonalityFn());
+
   if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
     WinEHCatchDirector *CatchDirector =
         reinterpret_cast<WinEHCatchDirector *>(Director.get());
     CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
     CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
-  }
+
+    // Look for blocks that are not part of the landing pad that we just
+    // outlined but terminate with a call to llvm.eh.endcatch and a
+    // branch to a block that is in the handler we just outlined.
+    // These blocks will be part of a nested landing pad that intends to
+    // return to an address in this handler.  This case is best handled
+    // after both landing pads have been outlined, so for now we'll just
+    // save the association of the blocks in LPadTargetBlocks.  The
+    // return instructions which are created from these branches will be
+    // replaced after all landing pads have been outlined.
+    for (const auto MapEntry : VMap) {
+      // VMap maps all values and blocks that were just cloned, but dead
+      // blocks which were pruned will map to nullptr.
+      if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr)
+        continue;
+      const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first);
+      for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) {
+        auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator());
+        if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1)
+          continue;
+        BasicBlock::iterator II = const_cast<BranchInst *>(Branch);
+        --II;
+        if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) {
+          // This would indicate that a nested landing pad wants to return
+          // to a block that is outlined into two different handlers.
+          assert(!LPadTargetBlocks.count(MappedBB));
+          LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second);
+        }
+      }
+    }
+  } // End if (CatchAction)
 
   Action->setHandlerBlockOrFunc(Handler);
 
@@ -787,9 +985,8 @@ void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
 
   // The landingpad instruction returns an aggregate value.  Typically, its
   // value will be passed to a pair of extract value instructions and the
-  // results of those extracts are often passed to store instructions.
-  // In unoptimized code the stored value will often be loaded and then stored
-  // again.
+  // results of those extracts will have been promoted to reg values before
+  // this routine is called.
   for (auto *U : LPad->users()) {
     const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
     if (!Extract)
@@ -800,36 +997,17 @@ void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
     assert((Idx == 0 || Idx == 1) &&
            "Unexpected operation: extracting an unknown landing pad element");
     if (Idx == 0) {
-      // Element 0 doesn't directly corresponds to anything in the WinEH
-      // scheme.
-      // It will be stored to a memory location, then later loaded and finally
-      // the loaded value will be used as the argument to an
-      // llvm.eh.begincatch
-      // call.  We're tracking it here so that we can skip the store and load.
       ExtractedEHPtrs.push_back(Extract);
     } else if (Idx == 1) {
-      // Element 1 corresponds to the filter selector.  We'll map it to 1 for
-      // matching purposes, but it will also probably be stored to memory and
-      // reloaded, so we need to track the instuction so that we can map the
-      // loaded value too.
       ExtractedSelectors.push_back(Extract);
     }
-
-    // Look for stores of the extracted values.
-    for (auto *EU : Extract->users()) {
-      if (auto *Store = dyn_cast<StoreInst>(EU)) {
-        if (Idx == 1) {
-          SelectorStores.push_back(Store);
-          SelectorStoreAddrs.push_back(Store->getPointerOperand());
-        } else {
-          EHPtrStores.push_back(Store);
-          EHPtrStoreAddrs.push_back(Store->getPointerOperand());
-        }
-      }
-    }
   }
 }
 
+bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const {
+  return BB->getLandingPadInst() == OriginLPad;
+}
+
 bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
   if (Inst == OriginLPad)
     return true;
@@ -841,47 +1019,16 @@ bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
     if (Inst == Extract)
       return true;
   }
-  for (auto *Store : EHPtrStores) {
-    if (Inst == Store)
-      return true;
-  }
-  for (auto *Store : SelectorStores) {
-    if (Inst == Store)
-      return true;
-  }
-
   return false;
 }
 
-void LandingPadMap::remapSelector(ValueToValueMapTy &VMap,
-                                     Value *MappedValue) const {
-  // Remap all selector extract instructions to the specified value.
+void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                                  Value *SelectorValue) const {
+  // Remap all landing pad extract instructions to the specified values.
+  for (auto *Extract : ExtractedEHPtrs)
+    VMap[Extract] = EHPtrValue;
   for (auto *Extract : ExtractedSelectors)
-    VMap[Extract] = MappedValue;
-}
-
-bool LandingPadMap::mapIfEHLoad(const LoadInst *Load,
-                                   SmallVectorImpl<const StoreInst *> &Stores,
-                                   SmallVectorImpl<const Value *> &StoreAddrs) {
-  // This makes the assumption that a store we've previously seen dominates
-  // this load instruction.  That might seem like a rather huge assumption,
-  // but given the way that landingpads are constructed its fairly safe.
-  // FIXME: Add debug/assert code that verifies this.
-  const Value *LoadAddr = Load->getPointerOperand();
-  for (auto *StoreAddr : StoreAddrs) {
-    if (LoadAddr == StoreAddr) {
-      // Handle the common debug scenario where this loaded value is stored
-      // to a different location.
-      for (auto *U : Load->users()) {
-        if (auto *Store = dyn_cast<StoreInst>(U)) {
-          Stores.push_back(Store);
-          StoreAddrs.push_back(Store->getPointerOperand());
-        }
-      }
-      return true;
-    }
-  }
-  return false;
+    VMap[Extract] = SelectorValue;
 }
 
 CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
@@ -891,40 +1038,13 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
   if (LPadMap.isLandingPadSpecificInst(Inst))
     return CloningDirector::SkipInstruction;
 
-  if (auto *Load = dyn_cast<LoadInst>(Inst)) {
-    // Look for loads of (previously suppressed) landingpad values.
-    // The EHPtr load can be mapped to an undef value as it should only be used
-    // as an argument to llvm.eh.begincatch, but the selector value needs to be
-    // mapped to a constant value of 1.  This value will be used to simplify the
-    // branching to always flow to the current handler.
-    if (LPadMap.mapIfSelectorLoad(Load)) {
-      VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
-      return CloningDirector::SkipInstruction;
-    }
-    if (LPadMap.mapIfEHPtrLoad(Load)) {
-      VMap[Inst] = UndefValue::get(Int8PtrType);
-      return CloningDirector::SkipInstruction;
-    }
-
-    // Any other loads just get cloned.
-    return CloningDirector::CloneInstruction;
-  }
-
   // Nested landing pads will be cloned as stubs, with just the
   // landingpad instruction and an unreachable instruction. When
   // all landingpads have been outlined, we'll replace this with the
   // llvm.eh.actions call and indirect branch created when the
   // landing pad was outlined.
-  if (auto *NestedLPad = dyn_cast<LandingPadInst>(Inst)) {
-    Instruction *NewInst = NestedLPad->clone();
-    if (NestedLPad->hasName())
-      NewInst->setName(NestedLPad->getName());
-    // FIXME: Store this mapping somewhere else also.
-    VMap[NestedLPad] = NewInst;
-    BasicBlock::InstListType &InstList = NewBB->getInstList();
-    InstList.push_back(NewInst);
-    InstList.push_back(new UnreachableInst(NewBB->getContext()));
-    return CloningDirector::StopCloningBB;
+  if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) {
+    return handleLandingPad(VMap, LPad, NewBB);
   }
 
   if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
@@ -944,6 +1064,20 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
   return CloningDirector::CloneInstruction;
 }
 
+CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  Instruction *NewInst = LPad->clone();
+  if (LPad->hasName())
+    NewInst->setName(LPad->getName());
+  // Save this correlation for later processing.
+  NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad;
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  InstList.push_back(new UnreachableInst(NewBB->getContext()));
+  return CloningDirector::StopCloningBB;
+}
+
 CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
     ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
   // The argument to the call is some form of the first element of the
@@ -958,6 +1092,11 @@ CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
                                           "llvm.eh.begincatch found while "
                                           "outlining catch handler.");
   ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
+  if (isa<ConstantPointerNull>(ExceptionObjectVar))
+    return CloningDirector::SkipInstruction;
+  assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() &&
+         "catch parameter is not static alloca");
+  Materializer.escapeCatchObject(ExceptionObjectVar);
   return CloningDirector::SkipInstruction;
 }
 
@@ -971,27 +1110,32 @@ WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
 
   // The end catch call can occur in one of two places: either in a
   // landingpad block that is part of the catch handlers exception mechanism,
-  // or at the end of the catch block.  If it occurs in a landing pad, we must
-  // skip it and continue so that the landing pad gets cloned.
-  // FIXME: This case isn't fully supported yet and shouldn't turn up in any
-  //        of the test cases until it is.
-  if (IntrinCall->getParent()->isLandingPad())
+  // or at the end of the catch block.  However, a catch-all handler may call
+  // end catch from the original landing pad.  If the call occurs in a nested
+  // landing pad block, we must skip it and continue so that the landing pad
+  // gets cloned.
+  auto *ParentBB = IntrinCall->getParent();
+  if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB))
     return CloningDirector::SkipInstruction;
 
-  // If an end catch occurs anywhere else the next instruction should be an
-  // unconditional branch instruction that we want to replace with a return
-  // to the the address of the branch target.
-  const BasicBlock *EndCatchBB = IntrinCall->getParent();
-  const TerminatorInst *Terminator = EndCatchBB->getTerminator();
-  const BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
-  assert(Branch && Branch->isUnconditional());
-  assert(std::next(BasicBlock::const_iterator(IntrinCall)) ==
-         BasicBlock::const_iterator(Branch));
-
-  BasicBlock *ContinueLabel = Branch->getSuccessor(0);
-  ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueLabel),
-                     NewBB);
-  ReturnTargets.push_back(ContinueLabel);
+  // If an end catch occurs anywhere else we want to terminate the handler
+  // with a return to the code that follows the endcatch call.  If the
+  // next instruction is not an unconditional branch, we need to split the
+  // block to provide a clear target for the return instruction.
+  BasicBlock *ContinueBB;
+  auto Next = std::next(BasicBlock::const_iterator(IntrinCall));
+  const BranchInst *Branch = dyn_cast<BranchInst>(Next);
+  if (!Branch || !Branch->isUnconditional()) {
+    // We're interrupting the cloning process at this location, so the
+    // const_cast we're doing here will not cause a problem.
+    ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB),
+                            const_cast<Instruction *>(cast<Instruction>(Next)));
+  } else {
+    ContinueBB = Branch->getSuccessor(0);
+  }
+
+  ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB);
+  ReturnTargets.push_back(ContinueBB);
 
   // We just added a terminator to the cloned block.
   // Tell the caller to stop processing the current basic block so that
@@ -1029,6 +1173,20 @@ WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
   return CloningDirector::StopCloningBB;
 }
 
+CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  // The MS runtime will terminate the process if an exception occurs in a
+  // cleanup handler, so we shouldn't encounter landing pads in the actual
+  // cleanup code, but they may appear in catch blocks.  Depending on where
+  // we started cloning we may see one, but it will get dropped during dead
+  // block pruning.
+  Instruction *NewInst = new UnreachableInst(NewBB->getContext());
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  return CloningDirector::StopCloningBB;
+}
+
 CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
     ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
   // Catch blocks within cleanup handlers will always be unreachable.
@@ -1041,12 +1199,9 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
 
 CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
     ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  // Catch blocks within cleanup handlers will always be unreachable.
-  // We'll insert an unreachable instruction now, but it will be pruned
-  // before the cloning process is complete.
-  BasicBlock::InstListType &InstList = NewBB->getInstList();
-  InstList.push_back(new UnreachableInst(NewBB->getContext()));
-  return CloningDirector::StopCloningBB;
+  // Cleanup handlers nested within catch handlers may begin with a call to
+  // eh.endcatch.  We can just ignore that instruction.
+  return CloningDirector::SkipInstruction;
 }
 
 CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
@@ -1080,6 +1235,9 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
   NewCall->setDebugLoc(Invoke->getDebugLoc());
   VMap[Invoke] = NewCall;
 
+  // Remap the operands.
+  llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer);
+
   // Insert an unconditional branch to the normal destination.
   BranchInst::Create(Invoke->getNormalDest(), NewBB);
 
@@ -1088,7 +1246,7 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
 
   // We just added a terminator to the cloned block.
   // Tell the caller to stop processing the current basic block.
-  return CloningDirector::StopCloningBB;
+  return CloningDirector::CloneSuccessors;
 }
 
 CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
@@ -1104,7 +1262,8 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
 WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
     Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo)
     : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
-  Builder.SetInsertPoint(&OutlinedFn->getEntryBlock());
+  BasicBlock *EntryBB = &OutlinedFn->getEntryBlock();
+  Builder.SetInsertPoint(EntryBB, EntryBB->getFirstInsertionPt());
 }
 
 Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
@@ -1139,6 +1298,15 @@ Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
   return nullptr;
 }
 
+void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
+  // Catch parameter objects have to live in the parent frame. When we see a use
+  // of a catch parameter, add a sentinel to the multimap to indicate that it's
+  // used from another handler. This will prevent us from trying to sink the
+  // alloca into the handler and ensure that the catch parameter is present in
+  // the call to llvm.frameescape.
+  FrameVarInfo[V].push_back(getCatchObjectSentinel());
+}
+
 // This function maps the catch and cleanup handlers that are reachable from the
 // specified landing pad. The landing pad sequence will have this basic shape:
 //
@@ -1176,13 +1344,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
   DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
 
   if (NumClauses == 0) {
-    // This landing pad contains only cleanup code.
-    CleanupHandler *Action = new CleanupHandler(BB);
-    CleanupHandlerMap[BB] = Action;
-    Actions.insertCleanupHandler(Action);
-    DEBUG(dbgs() << "  Assuming cleanup code in block " << BB->getName()
-                 << "\n");
-    assert(LPad->isCleanup());
+    findCleanupHandlers(Actions, BB, nullptr);
     return;
   }
 
@@ -1202,14 +1364,8 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
       // exceptions but code called from catches can. For SEH, it isn't
       // important if some finally code before a catch-all is executed out of
       // line or after recovering from the exception.
-      if (Personality == EHPersonality::MSVC_CXX) {
-        if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
-          //   Add a cleanup entry to the list
-          Actions.insertCleanupHandler(CleanupAction);
-          DEBUG(dbgs() << "  Found cleanup code in block "
-                       << CleanupAction->getStartBlock()->getName() << "\n");
-        }
-      }
+      if (Personality == EHPersonality::MSVC_CXX)
+        findCleanupHandlers(Actions, BB, BB);
 
       // Add the catch handler to the action list.
       CatchHandler *Action =
@@ -1226,13 +1382,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
 
     CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
     // See if there is any interesting code executed before the dispatch.
-    if (auto *CleanupAction =
-            findCleanupHandler(BB, CatchAction->getStartBlock())) {
-      //   Add a cleanup entry to the list
-      Actions.insertCleanupHandler(CleanupAction);
-      DEBUG(dbgs() << "  Found cleanup code in block "
-                   << CleanupAction->getStartBlock()->getName() << "\n");
-    }
+    findCleanupHandlers(Actions, BB, CatchAction->getStartBlock());
 
     assert(CatchAction);
     ++HandlersFound;
@@ -1248,12 +1398,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
 
   // If we didn't wind up in a catch-all, see if there is any interesting code
   // executed before the resume.
-  if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
-    //   Add a cleanup entry to the list
-    Actions.insertCleanupHandler(CleanupAction);
-    DEBUG(dbgs() << "  Found cleanup code in block "
-                 << CleanupAction->getStartBlock()->getName() << "\n");
-  }
+  findCleanupHandlers(Actions, BB, BB);
 
   // It's possible that some optimization moved code into a landingpad that
   // wasn't
@@ -1313,20 +1458,56 @@ CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
   return nullptr;
 }
 
-// These are helper functions to combine repeated code from findCleanupHandler.
-static CleanupHandler *createCleanupHandler(CleanupHandlerMapTy &CleanupHandlerMap,
-                                            BasicBlock *BB) {
+// These are helper functions to combine repeated code from findCleanupHandlers.
+static void createCleanupHandler(LandingPadActions &Actions,
+                                 CleanupHandlerMapTy &CleanupHandlerMap,
+                                 BasicBlock *BB) {
   CleanupHandler *Action = new CleanupHandler(BB);
   CleanupHandlerMap[BB] = Action;
-  return Action;
+  Actions.insertCleanupHandler(Action);
+  DEBUG(dbgs() << "  Found cleanup code in block "
+               << Action->getStartBlock()->getName() << "\n");
+}
+
+static bool isFrameAddressCall(Value *V) {
+  return match(V, m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0)));
+}
+
+static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
+                                         Instruction *MaybeCall) {
+  // Look for finally blocks that Clang has already outlined for us.
+  //   %fp = call i8* @llvm.frameaddress(i32 0)
+  //   call void @"fin$parent"(iN 1, i8* %fp)
+  if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
+    MaybeCall = MaybeCall->getNextNode();
+  CallSite FinallyCall(MaybeCall);
+  if (!FinallyCall || FinallyCall.arg_size() != 2)
+    return CallSite();
+  if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
+    return CallSite();
+  if (!isFrameAddressCall(FinallyCall.getArgument(1)))
+    return CallSite();
+  return FinallyCall;
+}
+
+static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) {
+  // Skip single ubr blocks.
+  while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) {
+    auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
+    if (Br && Br->isUnconditional())
+      BB = Br->getSuccessor(0);
+    else
+      return BB;
+  }
+  return BB;
 }
 
 // This function searches starting with the input block for the next block that
 // contains code that is not part of a catch handler and would not be eliminated
 // during handler outlining.
 //
-CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
-                                                 BasicBlock *EndBB) {
+void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
+                                       BasicBlock *StartBB, BasicBlock *EndBB) {
   // Here we will skip over the following:
   //
   // landing pad prolog:
@@ -1343,6 +1524,7 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
   // Anything other than an unconditional branch will kick us out of this loop
   // one way or another.
   while (BB) {
+    BB = followSingleUnconditionalBranches(BB);
     // If we've already scanned this block, don't scan it again.  If it is
     // a cleanup block, there will be an action in the CleanupHandlerMap.
     // If we've scanned it and it is not a cleanup block, there will be a
@@ -1351,7 +1533,12 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
     // avoid creating a null entry for blocks we haven't scanned.
     if (CleanupHandlerMap.count(BB)) {
       if (auto *Action = CleanupHandlerMap[BB]) {
-        return cast<CleanupHandler>(Action);
+        Actions.insertCleanupHandler(Action);
+        DEBUG(dbgs() << "  Found cleanup code in block "
+              << Action->getStartBlock()->getName() << "\n");
+        // FIXME: This cleanup might chain into another, and we need to discover
+        // that.
+        return;
       } else {
         // Here we handle the case where the cleanup handler map contains a
         // value for this block but the value is a nullptr.  This means that
@@ -1363,11 +1550,9 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
         // would terminate the search for cleanup code, so the unconditional
         // branch is the only case for which we might need to continue
         // searching.
-        if (BB == EndBB)
-          return nullptr;
-        BasicBlock *SuccBB;
-        if (!match(BB->getTerminator(), m_UnconditionalBr(SuccBB)))
-          return nullptr;
+        BasicBlock *SuccBB = followSingleUnconditionalBranches(BB);
+        if (SuccBB == BB || SuccBB == EndBB)
+          return;
         BB = SuccBB;
         continue;
       }
@@ -1390,26 +1575,23 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
     }
 
     // Look for the bare resume pattern:
-    //   %exn2 = load i8** %exn.slot
-    //   %sel2 = load i32* %ehselector.slot
-    //   %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn2, 0
-    //   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel2, 1
+    //   %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0
+    //   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1
     //   resume { i8*, i32 } %lpad.val2
     if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
       InsertValueInst *Insert1 = nullptr;
       InsertValueInst *Insert2 = nullptr;
       Value *ResumeVal = Resume->getOperand(0);
-      // If there is only one landingpad, we may use the lpad directly with no
-      // insertions.
-      if (isa<LandingPadInst>(ResumeVal))
-        return nullptr;
-      if (!isa<PHINode>(ResumeVal)) {
+      // If the resume value isn't a phi or landingpad value, it should be a
+      // series of insertions. Identify them so we can avoid them when scanning
+      // for cleanups.
+      if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) {
         Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
         if (!Insert2)
-          return createCleanupHandler(CleanupHandlerMap, BB);
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
         Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
         if (!Insert1)
-          return createCleanupHandler(CleanupHandlerMap, BB);
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
       }
       for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
            II != IE; ++II) {
@@ -1420,66 +1602,133 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
           continue;
         if (!Inst->hasOneUse() ||
             (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
-          return createCleanupHandler(CleanupHandlerMap, BB);
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
         }
       }
-      return nullptr;
+      return;
     }
 
     BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
-    if (Branch) {
-      if (Branch->isConditional()) {
-        // Look for the selector dispatch.
-        //   %sel = load i32* %ehselector.slot
-        //   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
-        //   %matches = icmp eq i32 %sel12, %2
-        //   br i1 %matches, label %catch14, label %eh.resume
-        CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
-        if (!Compare || !Compare->isEquality())
-          return createCleanupHandler(CleanupHandlerMap, BB);
-        for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
-                                  IE = BB->end();
-             II != IE; ++II) {
-          Instruction *Inst = II;
-          if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
-            continue;
-          if (Inst == Compare || Inst == Branch)
-            continue;
-          if (!Inst->hasOneUse() || (Inst->user_back() != Compare))
-            return createCleanupHandler(CleanupHandlerMap, BB);
-          if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
-            continue;
-          if (!isa<LoadInst>(Inst))
-            return createCleanupHandler(CleanupHandlerMap, BB);
-        }
-        // The selector dispatch block should always terminate our search.
-        assert(BB == EndBB);
-        return nullptr;
-      } else {
-        // Look for empty blocks with unconditional branches.
-        for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
-                                  IE = BB->end();
-             II != IE; ++II) {
-          Instruction *Inst = II;
-          if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
-            continue;
-          if (Inst == Branch)
-            continue;
-          if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
-            continue;
-          // Anything else makes this interesting cleanup code.
-          return createCleanupHandler(CleanupHandlerMap, BB);
+    if (Branch && Branch->isConditional()) {
+      // Look for the selector dispatch.
+      //   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
+      //   %matches = icmp eq i32 %sel, %2
+      //   br i1 %matches, label %catch14, label %eh.resume
+      CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
+      if (!Compare || !Compare->isEquality())
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+           II != IE; ++II) {
+        Instruction *Inst = II;
+        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+          continue;
+        if (Inst == Compare || Inst == Branch)
+          continue;
+        if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+          continue;
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      }
+      // The selector dispatch block should always terminate our search.
+      assert(BB == EndBB);
+      return;
+    }
+
+    if (isAsynchronousEHPersonality(Personality)) {
+      // If this is a landingpad block, split the block at the first non-landing
+      // pad instruction.
+      Instruction *MaybeCall = BB->getFirstNonPHIOrDbg();
+      if (LPadMap) {
+        while (MaybeCall != BB->getTerminator() &&
+               LPadMap->isLandingPadSpecificInst(MaybeCall))
+          MaybeCall = MaybeCall->getNextNode();
+      }
+
+      // Look for outlined finally calls.
+      if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
+        Function *Fin = FinallyCall.getCalledFunction();
+        assert(Fin && "outlined finally call should be direct");
+        auto *Action = new CleanupHandler(BB);
+        Action->setHandlerBlockOrFunc(Fin);
+        Actions.insertCleanupHandler(Action);
+        CleanupHandlerMap[BB] = Action;
+        DEBUG(dbgs() << "  Found frontend-outlined finally call to "
+                     << Fin->getName() << " in block "
+                     << Action->getStartBlock()->getName() << "\n");
+
+        // Split the block if there were more interesting instructions and look
+        // for finally calls in the normal successor block.
+        BasicBlock *SuccBB = BB;
+        if (FinallyCall.getInstruction() != BB->getTerminator() &&
+            FinallyCall.getInstruction()->getNextNode() != BB->getTerminator()) {
+          SuccBB = BB->splitBasicBlock(FinallyCall.getInstruction()->getNextNode());
+        } else {
+          if (FinallyCall.isInvoke()) {
+            SuccBB = cast<InvokeInst>(FinallyCall.getInstruction())->getNormalDest();
+          } else {
+            SuccBB = BB->getUniqueSuccessor();
+            assert(SuccBB && "splitOutlinedFinallyCalls didn't insert a branch");
+          }
         }
+        BB = SuccBB;
         if (BB == EndBB)
-          return nullptr;
-        // The branch was unconditional.
-        BB = Branch->getSuccessor(0);
+          return;
+        continue;
+      }
+    }
+
+    // Anything else is either a catch block or interesting cleanup code.
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE; ++II) {
+      Instruction *Inst = II;
+      if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+        continue;
+      // Unconditional branches fall through to this loop.
+      if (Inst == Branch)
         continue;
-      } // End else of if branch was conditional
-    }   // End if Branch
+      // If this is a catch block, there is no cleanup code to be found.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+        return;
+      // If this a nested landing pad, it may contain an endcatch call.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+        return;
+      // Anything else makes this interesting cleanup code.
+      return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+    }
 
-    // Anything else makes this interesting cleanup code.
-    return createCleanupHandler(CleanupHandlerMap, BB);
+    // Only unconditional branches in empty blocks should get this far.
+    assert(Branch && Branch->isUnconditional());
+    if (BB == EndBB)
+      return;
+    BB = Branch->getSuccessor(0);
   }
-  return nullptr;
+}
+
+// This is a public function, declared in WinEHFuncInfo.h and is also
+// referenced by WinEHNumbering in FunctionLoweringInfo.cpp.
+void llvm::parseEHActions(const IntrinsicInst *II,
+                          SmallVectorImpl<ActionHandler *> &Actions) {
+  for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
+    uint64_t ActionKind =
+        cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
+    if (ActionKind == /*catch=*/1) {
+      auto *Selector = cast<Constant>(II->getArgOperand(I + 1));
+      ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2));
+      int64_t EHObjIndexVal = EHObjIndex->getSExtValue();
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 3));
+      I += 4;
+      auto *CH = new CatchHandler(/*BB=*/nullptr, Selector, /*NextBB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      CH->setExceptionVarIndex(EHObjIndexVal);
+      Actions.push_back(CH);
+    } else if (ActionKind == 0) {
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 1));
+      I += 2;
+      auto *CH = new CleanupHandler(/*BB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      Actions.push_back(CH);
+    } else {
+      llvm_unreachable("Expected either a catch or cleanup handler!");
+    }
+  }
+  std::reverse(Actions.begin(), Actions.end());
 }
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 9f56214..7846529 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -197,8 +197,7 @@ public:
         DataAlignmentFactor(DataAlignmentFactor),
         ReturnAddressRegister(ReturnAddressRegister) {}
 
-  ~CIE() {
-  }
+  ~CIE() override {}
 
   uint64_t getCodeAlignmentFactor() const { return CodeAlignmentFactor; }
   int64_t getDataAlignmentFactor() const { return DataAlignmentFactor; }
@@ -245,8 +244,7 @@ public:
         InitialLocation(InitialLocation), AddressRange(AddressRange),
         LinkedCIE(Cie) {}
 
-  ~FDE() {
-  }
+  ~FDE() override {}
 
   CIE *getLinkedCIE() const { return LinkedCIE; }
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index b14af07..0aff327 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -48,9 +48,10 @@ public:
     reset();
   }
 
-  uint32_t getChildCount() const { return Args.size(); }
+  uint32_t getChildCount() const override { return Args.size(); }
 
-  std::unique_ptr<PDBSymbolData> getChildAtIndex(uint32_t Index) const {
+  std::unique_ptr<PDBSymbolData>
+  getChildAtIndex(uint32_t Index) const override {
     if (Index >= Args.size())
       return nullptr;
 
@@ -58,7 +59,7 @@ public:
         Args[Index]->getSymIndexId());
   }
 
-  std::unique_ptr<PDBSymbolData> getNext() {
+  std::unique_ptr<PDBSymbolData> getNext() override {
     if (CurIter == Args.end())
       return nullptr;
     const auto &Result = **CurIter;
@@ -66,9 +67,9 @@ public:
     return Session.getConcreteSymbolById<PDBSymbolData>(Result.getSymIndexId());
   }
 
-  void reset() { CurIter = Args.empty() ? Args.end() : Args.begin(); }
+  void reset() override { CurIter = Args.empty() ? Args.end() : Args.begin(); }
 
-  FunctionArgEnumerator *clone() const {
+  FunctionArgEnumerator *clone() const override {
     return new FunctionArgEnumerator(Session, Func);
   }
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 8018206..af3563f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -34,25 +34,27 @@ public:
                         std::unique_ptr<ArgEnumeratorType> ArgEnumerator)
       : Session(PDBSession), Enumerator(std::move(ArgEnumerator)) {}
 
-  uint32_t getChildCount() const { return Enumerator->getChildCount(); }
+  uint32_t getChildCount() const override {
+    return Enumerator->getChildCount();
+  }
 
-  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const {
+  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override {
     auto FunctionArgSymbol = Enumerator->getChildAtIndex(Index);
     if (!FunctionArgSymbol)
       return nullptr;
     return Session.getSymbolById(FunctionArgSymbol->getTypeId());
   }
 
-  std::unique_ptr<PDBSymbol> getNext() {
+  std::unique_ptr<PDBSymbol> getNext() override {
     auto FunctionArgSymbol = Enumerator->getNext();
     if (!FunctionArgSymbol)
       return nullptr;
     return Session.getSymbolById(FunctionArgSymbol->getTypeId());
   }
 
-  void reset() { Enumerator->reset(); }
+  void reset() override { Enumerator->reset(); }
 
-  MyType *clone() const {
+  MyType *clone() const override {
     std::unique_ptr<ArgEnumeratorType> Clone(Enumerator->clone());
     return new FunctionArgEnumerator(Session, std::move(Clone));
   }
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
deleted file mode 100644
index 6453099..0000000
--- a/lib/ExecutionEngine/EventListenerCommon.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Common functionality for JITEventListener implementations
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef EVENT_LISTENER_COMMON_H
-#define EVENT_LISTENER_COMMON_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Path.h"
-
-namespace llvm {
-
-namespace jitprofiling {
-
-class FilenameCache {
-  // Holds the filename of each Scope, so that we can pass a null-terminated
-  // string into oprofile.  
-  DenseMap<const MDNode *, std::string> Filenames;
-  DenseMap<const MDNode *, std::string> Paths;
-
- public:
-  const char *getFilename(MDNode *Scope) {
-    assert(Scope->isResolved() && "Expected Scope to be resolved");
-    std::string &Filename = Filenames[Scope];
-    if (Filename.empty()) {
-      DIScope DIScope(Scope);
-      Filename = DIScope.getFilename();
-    }
-    return Filename.c_str();
-  }
-
-  const char *getFullPath(MDNode *Scope) {
-    assert(Scope->isResolved() && "Expected Scope to be resolved");
-    std::string &P = Paths[Scope];
-    if (P.empty()) {
-      DIScope DIScope(Scope);
-      StringRef DirName = DIScope.getDirectory();
-      StringRef FileName = DIScope.getFilename();
-      SmallString<256> FullPath;
-      if (DirName != "." && DirName != "") {
-        FullPath = DirName;
-      }
-      if (FileName != "") {
-        sys::path::append(FullPath, FileName);
-      }
-      P = FullPath.str();
-    }
-    return P.c_str();
-  }
-};
-
-} // namespace jitprofiling
-
-} // namespace llvm
-
-#endif //EVENT_LISTENER_COMMON_H
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index c586ba7..d7038fd 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -18,9 +18,11 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
@@ -45,11 +47,13 @@ STATISTIC(NumGlobals  , "Number of global vars initialized");
 
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
     std::unique_ptr<Module> M, std::string *ErrorStr,
-    std::unique_ptr<RTDyldMemoryManager> MCJMM,
+    std::shared_ptr<MCJITMemoryManager> MemMgr,
+    std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
     std::unique_ptr<TargetMachine> TM) = nullptr;
 
 ExecutionEngine *(*ExecutionEngine::OrcMCJITReplacementCtor)(
-  std::string *ErrorStr, std::unique_ptr<RTDyldMemoryManager> OrcJMM,
+  std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MemMgr,
+  std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
   std::unique_ptr<TargetMachine> TM) = nullptr;
 
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
@@ -58,8 +62,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
 void JITEventListener::anchor() {}
 
 ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
-  : EEState(*this),
-    LazyFunctionCreator(nullptr) {
+  : LazyFunctionCreator(nullptr) {
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
@@ -151,38 +154,52 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
 }
 
 
-void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) {
-  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
-  void *OldVal;
+uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
+  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(Name);
+  uint64_t OldVal;
 
   // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
   // GlobalAddressMap.
   if (I == GlobalAddressMap.end())
-    OldVal = nullptr;
+    OldVal = 0;
   else {
+    GlobalAddressReverseMap.erase(I->second);
     OldVal = I->second;
     GlobalAddressMap.erase(I);
   }
 
-  GlobalAddressReverseMap.erase(OldVal);
   return OldVal;
 }
 
+std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+  Mangler Mang(DL);
+  SmallString<128> FullName;
+  Mang.getNameWithPrefix(FullName, GV->getName());
+  return FullName.str();
+}
+
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
+  addGlobalMapping(getMangledName(GV), (uint64_t) Addr);
+}
+
+void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) {
+  MutexGuard locked(lock);
+
+  assert(!Name.empty() && "Empty GlobalMapping symbol name!");
 
-  DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
-        << "\' to [" << Addr << "]\n";);
-  void *&CurVal = EEState.getGlobalAddressMap()[GV];
+  DEBUG(dbgs() << "JIT: Map \'" << Name  << "\' to [" << Addr << "]\n";);
+  uint64_t &CurVal = EEState.getGlobalAddressMap()[Name];
   assert((!CurVal || !Addr) && "GlobalMapping already established!");
   CurVal = Addr;
 
   // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap().empty()) {
-    AssertingVH<const GlobalValue> &V =
-      EEState.getGlobalAddressReverseMap()[Addr];
-    assert((!V || !GV) && "GlobalMapping already established!");
-    V = GV;
+    std::string &V = EEState.getGlobalAddressReverseMap()[CurVal];
+    assert((!V.empty() || !Name.empty()) &&
+           "GlobalMapping already established!");
+    V = Name;
   }
 }
 
@@ -197,13 +214,19 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
   MutexGuard locked(lock);
 
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
-    EEState.RemoveMapping(FI);
+    EEState.RemoveMapping(getMangledName(FI));
   for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
        GI != GE; ++GI)
-    EEState.RemoveMapping(GI);
+    EEState.RemoveMapping(getMangledName(GI));
+}
+
+uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV,
+                                              void *Addr) {
+  MutexGuard locked(lock);
+  return updateGlobalMapping(getMangledName(GV), (uint64_t) Addr);
 }
 
-void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) {
   MutexGuard locked(lock);
 
   ExecutionEngineState::GlobalAddressMapTy &Map =
@@ -211,10 +234,10 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 
   // Deleting from the mapping?
   if (!Addr)
-    return EEState.RemoveMapping(GV);
+    return EEState.RemoveMapping(Name);
 
-  void *&CurVal = Map[GV];
-  void *OldVal = CurVal;
+  uint64_t &CurVal = Map[Name];
+  uint64_t OldVal = CurVal;
 
   if (CurVal && !EEState.getGlobalAddressReverseMap().empty())
     EEState.getGlobalAddressReverseMap().erase(CurVal);
@@ -222,20 +245,35 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 
   // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap().empty()) {
-    AssertingVH<const GlobalValue> &V =
-      EEState.getGlobalAddressReverseMap()[Addr];
-    assert((!V || !GV) && "GlobalMapping already established!");
-    V = GV;
+    std::string &V = EEState.getGlobalAddressReverseMap()[CurVal];
+    assert((!V.empty() || !Name.empty()) &&
+           "GlobalMapping already established!");
+    V = Name;
   }
   return OldVal;
 }
 
-void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) {
   MutexGuard locked(lock);
-
+  uint64_t Address = 0;
   ExecutionEngineState::GlobalAddressMapTy::iterator I =
-    EEState.getGlobalAddressMap().find(GV);
-  return I != EEState.getGlobalAddressMap().end() ? I->second : nullptr;
+    EEState.getGlobalAddressMap().find(S);
+  if (I != EEState.getGlobalAddressMap().end())
+    Address = I->second;
+  return Address;
+}
+
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(StringRef S) {
+  MutexGuard locked(lock);
+  if (void* Address = (void *) getAddressToGlobalIfAvailable(S))
+    return Address;
+  return nullptr;
+}
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+  return getPointerToGlobalIfAvailable(getMangledName(GV));
 }
 
 const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
@@ -244,15 +282,25 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
   // If we haven't computed the reverse mapping yet, do so first.
   if (EEState.getGlobalAddressReverseMap().empty()) {
     for (ExecutionEngineState::GlobalAddressMapTy::iterator
-         I = EEState.getGlobalAddressMap().begin(),
-         E = EEState.getGlobalAddressMap().end(); I != E; ++I)
+           I = EEState.getGlobalAddressMap().begin(),
+           E = EEState.getGlobalAddressMap().end(); I != E; ++I) {
+      StringRef Name = I->first();
+      uint64_t Addr = I->second;
       EEState.getGlobalAddressReverseMap().insert(std::make_pair(
-                                                          I->second, I->first));
+                                                          Addr, Name));
+    }
   }
 
-  std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
-    EEState.getGlobalAddressReverseMap().find(Addr);
-  return I != EEState.getGlobalAddressReverseMap().end() ? I->second : nullptr;
+  std::map<uint64_t, std::string>::iterator I =
+    EEState.getGlobalAddressReverseMap().find((uint64_t) Addr);
+
+  if (I != EEState.getGlobalAddressReverseMap().end()) {
+    StringRef Name = I->second;
+    for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+      if (GlobalValue *GV = Modules[i]->getNamedValue(Name))
+        return GV;
+  }
+  return nullptr;
 }
 
 namespace {
@@ -404,8 +452,9 @@ EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {}
 
 EngineBuilder::EngineBuilder(std::unique_ptr<Module> M)
     : M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr),
-      OptLevel(CodeGenOpt::Default), MCJMM(nullptr), RelocModel(Reloc::Default),
-      CMModel(CodeModel::JITDefault), UseOrcMCJITReplacement(false) {
+      OptLevel(CodeGenOpt::Default), MemMgr(nullptr), Resolver(nullptr),
+      RelocModel(Reloc::Default), CMModel(CodeModel::JITDefault),
+      UseOrcMCJITReplacement(false) {
 // IR module verification is enabled by default in debug builds, and disabled
 // by default in release builds.
 #ifndef NDEBUG
@@ -419,7 +468,21 @@ EngineBuilder::~EngineBuilder() = default;
 
 EngineBuilder &EngineBuilder::setMCJITMemoryManager(
                                    std::unique_ptr<RTDyldMemoryManager> mcjmm) {
-  MCJMM = std::move(mcjmm);
+  auto SharedMM = std::shared_ptr<RTDyldMemoryManager>(std::move(mcjmm));
+  MemMgr = SharedMM;
+  Resolver = SharedMM;
+  return *this;
+}
+
+EngineBuilder&
+EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) {
+  MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM));
+  return *this;
+}
+
+EngineBuilder&
+EngineBuilder::setSymbolResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> SR) {
+  Resolver = std::shared_ptr<RuntimeDyld::SymbolResolver>(std::move(SR));
   return *this;
 }
 
@@ -434,7 +497,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
   // If the user specified a memory manager but didn't specify which engine to
   // create, we assume they only want the JIT, and we fail if they only want
   // the interpreter.
-  if (MCJMM) {
+  if (MemMgr) {
     if (WhichEngine & EngineKind::JIT)
       WhichEngine = EngineKind::JIT;
     else {
@@ -456,12 +519,13 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
 
     ExecutionEngine *EE = nullptr;
     if (ExecutionEngine::OrcMCJITReplacementCtor && UseOrcMCJITReplacement) {
-      EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MCJMM),
+      EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MemMgr),
+                                                    std::move(Resolver),
                                                     std::move(TheTM));
       EE->addModule(std::move(M));
     } else if (ExecutionEngine::MCJITCtor)
-      EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MCJMM),
-                                      std::move(TheTM));
+      EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MemMgr),
+                                      std::move(Resolver), std::move(TheTM));
 
     if (EE) {
       EE->setVerifyModules(VerifyModules);
@@ -492,7 +556,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
     return getPointerToFunction(F);
 
   MutexGuard locked(lock);
-  if (void *P = EEState.getGlobalAddressMap()[GV])
+  if (void* P = getPointerToGlobalIfAvailable(GV))
     return P;
 
   // Global variable might have been added since interpreter started.
@@ -502,7 +566,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
   else
     llvm_unreachable("Global hasn't had an address allocated yet!");
 
-  return EEState.getGlobalAddressMap()[GV];
+  return getPointerToGlobalIfAvailable(GV);
 }
 
 /// \brief Converts a Constant* into a GenericValue, including handling of
@@ -1274,25 +1338,3 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
   NumInitBytes += (unsigned)GVSize;
   ++NumGlobals;
 }
-
-ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
-  : EE(EE), GlobalAddressMap(this) {
-}
-
-sys::Mutex *
-ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
-  return &EES->EE.lock;
-}
-
-void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
-                                                      const GlobalValue *Old) {
-  void *OldVal = EES->GlobalAddressMap.lookup(Old);
-  EES->GlobalAddressReverseMap.erase(OldVal);
-}
-
-void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
-                                                    const GlobalValue *,
-                                                    const GlobalValue *) {
-  llvm_unreachable("The ExecutionEngine doesn't know how to handle a"
-                   " RAUW on a value it has a global mapping for.");
-}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index aaa53f0..22ff311 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -351,7 +351,7 @@ class SimpleBindingMemoryManager : public RTDyldMemoryManager {
 public:
   SimpleBindingMemoryManager(const SimpleBindingMMFunctions& Functions,
                              void *Opaque);
-  virtual ~SimpleBindingMemoryManager();
+  ~SimpleBindingMemoryManager() override;
 
   uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID,
diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 8ef878c..1ab6203 100644
--- a/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -103,7 +103,7 @@ public:
 
   /// Unregisters each object that was previously registered and releases all
   /// internal resources.
-  virtual ~GDBJITRegistrationListener();
+  ~GDBJITRegistrationListener() override;
 
   /// Creates an entry in the JIT registry for the buffer @p Object,
   /// which must contain an object file in executable memory with any
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index aa32452..4135900 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"
-#include "EventListenerCommon.h"
 #include "IntelJITEventsWrapper.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -29,7 +28,6 @@
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
-using namespace llvm::jitprofiling;
 using namespace llvm::object;
 
 #define DEBUG_TYPE "amplifier-jit-event-listener"
@@ -41,7 +39,6 @@ class IntelJITEventListener : public JITEventListener {
 
   std::unique_ptr<IntelJITEventsWrapper> Wrapper;
   MethodIDMap MethodIDs;
-  FilenameCache Filenames;
 
   typedef SmallVector<const void *, 64> MethodAddressVector;
   typedef DenseMap<const void *, MethodAddressVector>  ObjectMap;
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 2e8eb16..a26740b 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -316,7 +316,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
 
 #define IMPLEMENT_VECTOR_FCMP(OP)                                   \
   case Type::VectorTyID:                                            \
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {   \
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {      \
       IMPLEMENT_VECTOR_FCMP_T(OP, Float);                           \
     } else {                                                        \
         IMPLEMENT_VECTOR_FCMP_T(OP, Double);                        \
@@ -363,7 +363,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
 
 #define MASK_VECTOR_NANS(TY, X,Y, FLAG)                                     \
   if (TY->isVectorTy()) {                                                   \
-    if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) {          \
+    if (cast<VectorType>(TY)->getElementType()->isFloatTy()) {              \
       MASK_VECTOR_NANS_T(X, Y, Float, FLAG)                                 \
     } else {                                                                \
       MASK_VECTOR_NANS_T(X, Y, Double, FLAG)                                \
@@ -536,7 +536,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
   if(Ty->isVectorTy()) {
     assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
     Dest.AggregateVal.resize( Src1.AggregateVal.size() );
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
       for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
         Dest.AggregateVal[_i].IntVal = APInt(1,
         ( (Src1.AggregateVal[_i].FloatVal ==
@@ -567,7 +567,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
   if(Ty->isVectorTy()) {
     assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
     Dest.AggregateVal.resize( Src1.AggregateVal.size() );
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
       for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
         Dest.AggregateVal[_i].IntVal = APInt(1,
         ( (Src1.AggregateVal[_i].FloatVal !=
@@ -713,10 +713,10 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
     // Macros to choose appropriate TY: float or double and run operation
     // execution
 #define FLOAT_VECTOR_OP(OP) {                                         \
-  if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())        \
+  if (cast<VectorType>(Ty)->getElementType()->isFloatTy())            \
     FLOAT_VECTOR_FUNCTION(OP, FloatVal)                               \
   else {                                                              \
-    if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())     \
+    if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())         \
       FLOAT_VECTOR_FUNCTION(OP, DoubleVal)                            \
     else {                                                            \
       dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
@@ -745,12 +745,12 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
     case Instruction::FMul:  FLOAT_VECTOR_OP(*) break;
     case Instruction::FDiv:  FLOAT_VECTOR_OP(/) break;
     case Instruction::FRem:
-      if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
         for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
           R.AggregateVal[i].FloatVal = 
           fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
       else {
-        if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+        if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
           for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
             R.AggregateVal[i].DoubleVal = 
             fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index b022101..e2fe065 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -95,16 +95,15 @@ static ExFunc lookupFunction(const Function *F) {
   FunctionType *FT = F->getFunctionType();
   for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
     ExtName += getTypeID(FT->getContainedType(i));
-  ExtName += "_" + F->getName().str();
+  ExtName += ("_" + F->getName()).str();
 
   sys::ScopedLock Writer(*FunctionsLock);
   ExFunc FnPtr = (*FuncNames)[ExtName];
   if (!FnPtr)
-    FnPtr = (*FuncNames)["lle_X_" + F->getName().str()];
+    FnPtr = (*FuncNames)[("lle_X_" + F->getName()).str()];
   if (!FnPtr)  // Try calling a generic function... if it exists...
-    FnPtr = (ExFunc)(intptr_t)
-      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
-                                                    F->getName().str());
+    FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+        ("lle_X_" + F->getName()).str());
   if (FnPtr)
     ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
   return FnPtr;
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 2be9c59..0dc0463 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -108,7 +108,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
 
 public:
   explicit Interpreter(std::unique_ptr<Module> M);
-  ~Interpreter();
+  ~Interpreter() override;
 
   /// runAtExitHandlers - Run any functions registered by the program's calls to
   /// atexit(3), which we intercept and store in AtExitHandlers.
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 20b8553..7e37afe 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCJIT.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
@@ -41,26 +42,35 @@ static struct RegisterJIT {
 extern "C" void LLVMLinkInMCJIT() {
 }
 
-ExecutionEngine *MCJIT::createJIT(std::unique_ptr<Module> M,
-                                  std::string *ErrorStr,
-                                  std::unique_ptr<RTDyldMemoryManager> MemMgr,
-                                  std::unique_ptr<TargetMachine> TM) {
+ExecutionEngine*
+MCJIT::createJIT(std::unique_ptr<Module> M,
+                 std::string *ErrorStr,
+                 std::shared_ptr<MCJITMemoryManager> MemMgr,
+                 std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+                 std::unique_ptr<TargetMachine> TM) {
   // Try to register the program as a source of symbols to resolve against.
   //
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr);
 
-  std::unique_ptr<RTDyldMemoryManager> MM = std::move(MemMgr);
-  if (!MM)
-    MM = std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager());
+  if (!MemMgr || !Resolver) {
+    auto RTDyldMM = std::make_shared<SectionMemoryManager>();
+    if (!MemMgr)
+      MemMgr = RTDyldMM;
+    if (!Resolver)
+      Resolver = RTDyldMM;
+  }
 
-  return new MCJIT(std::move(M), std::move(TM), std::move(MM));
+  return new MCJIT(std::move(M), std::move(TM), std::move(MemMgr),
+                   std::move(Resolver));
 }
 
 MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
-             std::unique_ptr<RTDyldMemoryManager> MM)
+             std::shared_ptr<MCJITMemoryManager> MemMgr,
+             std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
     : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr),
-      MemMgr(this, std::move(MM)), Dyld(&MemMgr), ObjCache(nullptr) {
+      MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)),
+      Dyld(*this->MemMgr, this->Resolver), ObjCache(nullptr) {
   // FIXME: We are managing our modules, so we do not want the base class
   // ExecutionEngine to manage them as well. To avoid double destruction
   // of the first (and only) module added in ExecutionEngine constructor
@@ -221,7 +231,7 @@ void MCJIT::finalizeLoadedModules() {
   Dyld.registerEHFrames();
 
   // Set page permissions.
-  MemMgr.finalizeMemory();
+  MemMgr->finalizeMemory();
 }
 
 // FIXME: Rename this.
@@ -253,11 +263,11 @@ void MCJIT::finalizeModule(Module *M) {
   finalizeLoadedModules();
 }
 
-uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
+RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
   Mangler Mang(TM->getDataLayout());
   SmallString<128> FullName;
   Mang.getNameWithPrefix(FullName, Name);
-  return Dyld.getSymbol(FullName).getAddress();
+  return Dyld.getSymbol(FullName);
 }
 
 Module *MCJIT::findModuleForSymbol(const std::string &Name,
@@ -284,14 +294,17 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
 }
 
 uint64_t MCJIT::getSymbolAddress(const std::string &Name,
-                                 bool CheckFunctionsOnly)
-{
+                                 bool CheckFunctionsOnly) {
+  return findSymbol(Name, CheckFunctionsOnly).getAddress();
+}
+
+RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name,
+                                          bool CheckFunctionsOnly) {
   MutexGuard locked(lock);
 
   // First, check to see if we already have this symbol.
-  uint64_t Addr = getExistingSymbolAddress(Name);
-  if (Addr)
-    return Addr;
+  if (auto Sym = findExistingSymbol(Name))
+    return Sym;
 
   for (object::OwningBinary<object::Archive> &OB : Archives) {
     object::Archive *A = OB.getBinary();
@@ -310,9 +323,8 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
         // This causes the object file to be loaded.
         addObjectFile(std::move(OF));
         // The address should be here now.
-        Addr = getExistingSymbolAddress(Name);
-        if (Addr)
-          return Addr;
+        if (auto Sym = findExistingSymbol(Name))
+          return Sym;
       }
     }
   }
@@ -323,15 +335,18 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
     generateCodeForModule(M);
 
     // Check the RuntimeDyld table again, it should be there now.
-    return getExistingSymbolAddress(Name);
+    return findExistingSymbol(Name);
   }
 
   // If a LazyFunctionCreator is installed, use it to get/create the function.
   // FIXME: Should we instead have a LazySymbolCreator callback?
-  if (LazyFunctionCreator)
-    Addr = (uint64_t)LazyFunctionCreator(Name);
+  if (LazyFunctionCreator) {
+    auto Addr = static_cast<uint64_t>(
+                  reinterpret_cast<uintptr_t>(LazyFunctionCreator(Name)));
+    return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
+  }
 
-  return Addr;
+  return nullptr;
 }
 
 uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
@@ -528,7 +543,9 @@ GenericValue MCJIT::runFunction(Function *F,
 
 void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) {
   if (!isSymbolSearchingDisabled()) {
-    void *ptr = MemMgr.getPointerToNamedFunction(Name, false);
+    void *ptr =
+      reinterpret_cast<void*>(
+        static_cast<uintptr_t>(Resolver.findSymbol(Name).getAddress()));
     if (ptr)
       return ptr;
   }
@@ -566,7 +583,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
 void MCJIT::NotifyObjectEmitted(const object::ObjectFile& Obj,
                                 const RuntimeDyld::LoadedObjectInfo &L) {
   MutexGuard locked(lock);
-  MemMgr.notifyObjectLoaded(this, Obj);
+  MemMgr->notifyObjectLoaded(this, Obj);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyObjectEmitted(Obj, L);
   }
@@ -578,15 +595,16 @@ void MCJIT::NotifyFreeingObject(const object::ObjectFile& Obj) {
     L->NotifyFreeingObject(Obj);
 }
 
-uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) {
-  uint64_t Result = ParentEngine->getSymbolAddress(Name, false);
+RuntimeDyld::SymbolInfo
+LinkingSymbolResolver::findSymbol(const std::string &Name) {
+  auto Result = ParentEngine.findSymbol(Name, false);
   // If the symbols wasn't found and it begins with an underscore, try again
   // without the underscore.
   if (!Result && Name[0] == '_')
-    Result = ParentEngine->getSymbolAddress(Name.substr(1), false);
+    Result = ParentEngine.findSymbol(Name.substr(1), false);
   if (Result)
     return Result;
-  if (ParentEngine->isSymbolSearchingDisabled())
-    return 0;
-  return ClientMM->getSymbolAddress(Name);
+  if (ParentEngine.isSymbolSearchingDisabled())
+    return nullptr;
+  return ClientResolver->findSymbol(Name);
 }
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index de4a8f6..59e9949 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -16,6 +16,7 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/IR/Module.h"
 
@@ -26,59 +27,23 @@ class MCJIT;
 // functions across modules that it owns.  It aggregates the memory manager
 // that is passed in to the MCJIT constructor and defers most functionality
 // to that object.
-class LinkingMemoryManager : public RTDyldMemoryManager {
+class LinkingSymbolResolver : public RuntimeDyld::SymbolResolver {
 public:
-  LinkingMemoryManager(MCJIT *Parent,
-                       std::unique_ptr<RTDyldMemoryManager> MM)
-    : ParentEngine(Parent), ClientMM(std::move(MM)) {}
+  LinkingSymbolResolver(MCJIT &Parent,
+                        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
+    : ParentEngine(Parent), ClientResolver(std::move(Resolver)) {}
 
-  uint64_t getSymbolAddress(const std::string &Name) override;
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override;
 
-  // Functions deferred to client memory manager
-  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID,
-                               StringRef SectionName) override {
-    return ClientMM->allocateCodeSection(Size, Alignment, SectionID, SectionName);
-  }
-
-  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID, StringRef SectionName,
-                               bool IsReadOnly) override {
-    return ClientMM->allocateDataSection(Size, Alignment,
-                                         SectionID, SectionName, IsReadOnly);
-  }
-
-  void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
-                              uintptr_t DataSizeRW) override {
-    return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
-  }
-
-  bool needsToReserveAllocationSpace() override {
-    return ClientMM->needsToReserveAllocationSpace();
-  }
-
-  void notifyObjectLoaded(ExecutionEngine *EE,
-                          const object::ObjectFile &Obj) override {
-    ClientMM->notifyObjectLoaded(EE, Obj);
-  }
-
-  void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                        size_t Size) override {
-    ClientMM->registerEHFrames(Addr, LoadAddr, Size);
-  }
-
-  void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override {
-    ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
-  }
-
-  bool finalizeMemory(std::string *ErrMsg = nullptr) override {
-    return ClientMM->finalizeMemory(ErrMsg);
+  // MCJIT doesn't support logical dylibs.
+  RuntimeDyld::SymbolInfo
+  findSymbolInLogicalDylib(const std::string &Name) override {
+    return nullptr;
   }
 
 private:
-  MCJIT *ParentEngine;
-  std::unique_ptr<RTDyldMemoryManager> ClientMM;
+  MCJIT &ParentEngine;
+  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
 };
 
 // About Module states: added->loaded->finalized.
@@ -103,7 +68,8 @@ private:
 
 class MCJIT : public ExecutionEngine {
   MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
-        std::unique_ptr<RTDyldMemoryManager> MemMgr);
+        std::shared_ptr<MCJITMemoryManager> MemMgr,
+        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver);
 
   typedef llvm::SmallPtrSet<Module *, 4> ModulePtrSet;
 
@@ -214,7 +180,8 @@ class MCJIT : public ExecutionEngine {
 
   std::unique_ptr<TargetMachine> TM;
   MCContext *Ctx;
-  LinkingMemoryManager MemMgr;
+  std::shared_ptr<MCJITMemoryManager> MemMgr;
+  LinkingSymbolResolver Resolver;
   RuntimeDyld Dyld;
   std::vector<JITEventListener*> EventListeners;
 
@@ -238,7 +205,7 @@ class MCJIT : public ExecutionEngine {
                                                       ModulePtrSet::iterator E);
 
 public:
-  ~MCJIT();
+  ~MCJIT() override;
 
   /// @name ExecutionEngine interface implementation
   /// @{
@@ -324,17 +291,22 @@ public:
     MCJITCtor = createJIT;
   }
 
-  static ExecutionEngine *createJIT(std::unique_ptr<Module> M,
-                                    std::string *ErrorStr,
-                                    std::unique_ptr<RTDyldMemoryManager> MemMgr,
-                                    std::unique_ptr<TargetMachine> TM);
+  static ExecutionEngine*
+  createJIT(std::unique_ptr<Module> M,
+            std::string *ErrorStr,
+            std::shared_ptr<MCJITMemoryManager> MemMgr,
+            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+            std::unique_ptr<TargetMachine> TM);
 
   // @}
 
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name,
+                                     bool CheckFunctionsOnly);
+  // DEPRECATED - Please use findSymbol instead.
   // This is not directly exposed via the ExecutionEngine API, but it is
   // used by the LinkingMemoryManager.
   uint64_t getSymbolAddress(const std::string &Name,
-                          bool CheckFunctionsOnly);
+                            bool CheckFunctionsOnly);
 
 protected:
   /// emitObject -- Generate a JITed object in memory from the specified module
@@ -348,7 +320,7 @@ protected:
                            const RuntimeDyld::LoadedObjectInfo &L);
   void NotifyFreeingObject(const object::ObjectFile& Obj);
 
-  uint64_t getExistingSymbolAddress(const std::string &Name);
+  RuntimeDyld::SymbolInfo findExistingSymbol(const std::string &Name);
   Module *findModuleForSymbol(const std::string &Name,
                               bool CheckFunctionsOnly);
 };
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 9ab4003..23e7662 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"
-#include "EventListenerCommon.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/OProfileWrapper.h"
@@ -28,7 +27,6 @@
 #include <fcntl.h>
 
 using namespace llvm;
-using namespace llvm::jitprofiling;
 using namespace llvm::object;
 
 #define DEBUG_TYPE "oprofile-jit-event-listener"
diff --git a/lib/ExecutionEngine/Orc/Android.mk b/lib/ExecutionEngine/Orc/Android.mk
index 61c1daf..f28f359 100644
--- a/lib/ExecutionEngine/Orc/Android.mk
+++ b/lib/ExecutionEngine/Orc/Android.mk
@@ -6,6 +6,7 @@ include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := \
   CloneSubModule.cpp \
+  ExecutionUtils.cpp \
   IndirectionUtils.cpp \
   OrcMCJITReplacement.cpp \
   OrcTargetSupport.cpp
diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt
index b0a8445..b38b459 100644
--- a/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMOrcJIT
   CloneSubModule.cpp
+  ExecutionUtils.cpp
   IndirectionUtils.cpp
   OrcMCJITReplacement.cpp
   OrcTargetSupport.cpp
diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
new file mode 100644
index 0000000..b7220db
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -0,0 +1,102 @@
+//===---- ExecutionUtils.cpp - Utilities for executing functions in Orc ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+namespace orc {
+
+CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End)
+  : InitList(
+      GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr),
+    I((InitList && End) ? InitList->getNumOperands() : 0) {
+}
+
+bool CtorDtorIterator::operator==(const CtorDtorIterator &Other) const {
+  assert(InitList == Other.InitList && "Incomparable iterators.");
+  return I == Other.I;
+}
+
+bool CtorDtorIterator::operator!=(const CtorDtorIterator &Other) const {
+  return !(*this == Other);
+}
+
+CtorDtorIterator& CtorDtorIterator::operator++() {
+  ++I;
+  return *this;
+}
+
+CtorDtorIterator CtorDtorIterator::operator++(int) {
+  CtorDtorIterator Temp = *this;
+  ++I;
+  return Temp;
+}
+
+CtorDtorIterator::Element CtorDtorIterator::operator*() const {
+  ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(I));
+  assert(CS && "Unrecognized type in llvm.global_ctors/llvm.global_dtors");
+
+  Constant *FuncC = CS->getOperand(1);
+  Function *Func = nullptr;
+
+  // Extract function pointer, pulling off any casts.
+  while (FuncC) {
+    if (Function *F = dyn_cast_or_null<Function>(FuncC)) {
+      Func = F;
+      break;
+    } else if (ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(FuncC)) {
+      if (CE->isCast())
+        FuncC = dyn_cast_or_null<ConstantExpr>(CE->getOperand(0));
+      else
+        break;
+    } else {
+      // This isn't anything we recognize. Bail out with Func left set to null.
+      break;
+    }
+  }
+
+  ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+  Value *Data = CS->getOperand(2);
+  return Element(Priority->getZExtValue(), Func, Data);
+}
+
+iterator_range<CtorDtorIterator> getConstructors(const Module &M) {
+  const GlobalVariable *CtorsList = M.getNamedGlobal("llvm.global_ctors");
+  return make_range(CtorDtorIterator(CtorsList, false),
+                    CtorDtorIterator(CtorsList, true));
+}
+
+iterator_range<CtorDtorIterator> getDestructors(const Module &M) {
+  const GlobalVariable *DtorsList = M.getNamedGlobal("llvm.global_dtors");
+  return make_range(CtorDtorIterator(DtorsList, false),
+                    CtorDtorIterator(DtorsList, true));
+}
+
+void LocalCXXRuntimeOverrides::runDestructors() {
+  auto& CXXDestructorDataPairs = DSOHandleOverride;
+  for (auto &P : CXXDestructorDataPairs)
+    P.first(P.second);
+  CXXDestructorDataPairs.clear();
+}
+
+int LocalCXXRuntimeOverrides::CXAAtExitOverride(DestructorPtr Destructor,
+                                                void *Arg, void *DSOHandle) {
+  auto& CXXDestructorDataPairs =
+    *reinterpret_cast<CXXDestructorDataPairList*>(DSOHandle);
+  CXXDestructorDataPairs.push_back(std::make_pair(Destructor, Arg));
+  return 0;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 8cf490f..ebeedef 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -14,17 +14,25 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/IRBuilder.h"
 #include <set>
+#include <sstream>
 
 namespace llvm {
 namespace orc {
 
-GlobalVariable* createImplPointer(Function &F, const Twine &Name,
-                                  Constant *Initializer) {
-  assert(F.getParent() && "Function isn't in a module.");
+Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) {
+  Constant *AddrIntVal =
+    ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr);
+  Constant *AddrPtrVal =
+    ConstantExpr::getCast(Instruction::IntToPtr, AddrIntVal,
+                          PointerType::get(&FT, 0));
+  return AddrPtrVal;
+}
+
+GlobalVariable* createImplPointer(PointerType &PT, Module &M,
+                                  const Twine &Name, Constant *Initializer) {
   if (!Initializer)
-    Initializer = Constant::getNullValue(F.getType());
-  Module &M = *F.getParent();
-  return new GlobalVariable(M, F.getType(), false, GlobalValue::ExternalLinkage,
+    Initializer = Constant::getNullValue(&PT);
+  return new GlobalVariable(M, &PT, false, GlobalValue::ExternalLinkage,
                             Initializer, Name, nullptr,
                             GlobalValue::NotThreadLocal, 0, true);
 }
@@ -44,8 +52,41 @@ void makeStub(Function &F, GlobalVariable &ImplPointer) {
   Builder.CreateRet(Call);
 }
 
+// Utility class for renaming global values and functions during partitioning.
+class GlobalRenamer {
+public:
+
+  static bool needsRenaming(const Value &New) {
+    if (!New.hasName() || New.getName().startswith("\01L"))
+      return true;
+    return false;
+  }
+
+  const std::string& getRename(const Value &Orig) {
+    // See if we have a name for this global.
+    {
+      auto I = Names.find(&Orig);
+      if (I != Names.end())
+        return I->second;
+    }
+
+    // Nope. Create a new one.
+    // FIXME: Use a more robust uniquing scheme. (This may blow up if the user
+    //        writes a "__orc_anon[[:digit:]]* method).
+    unsigned ID = Names.size();
+    std::ostringstream NameStream;
+    NameStream << "__orc_anon" << ID++;
+    auto I = Names.insert(std::make_pair(&Orig, NameStream.str()));
+    return I.first->second;
+  }
+private:
+  DenseMap<const Value*, std::string> Names;
+};
+
 void partition(Module &M, const ModulePartitionMap &PMap) {
 
+  GlobalRenamer Renamer;
+
   for (auto &KVPair : PMap) {
 
     auto ExtractGlobalVars =
@@ -54,20 +95,26 @@ void partition(Module &M, const ModulePartitionMap &PMap) {
         if (KVPair.second.count(&Orig)) {
           copyGVInitializer(New, Orig, VMap);
         }
-        if (New.getLinkage() == GlobalValue::PrivateLinkage) {
+        if (New.hasLocalLinkage()) {
+          if (Renamer.needsRenaming(New))
+            New.setName(Renamer.getRename(Orig));
           New.setLinkage(GlobalValue::ExternalLinkage);
           New.setVisibility(GlobalValue::HiddenVisibility);
         }
+        assert(!Renamer.needsRenaming(New) && "Invalid global name.");
       };
 
     auto ExtractFunctions =
       [&](Function &New, const Function &Orig, ValueToValueMapTy &VMap) {
         if (KVPair.second.count(&Orig))
           copyFunctionBody(New, Orig, VMap);
-        if (New.getLinkage() == GlobalValue::InternalLinkage) {
+        if (New.hasLocalLinkage()) {
+          if (Renamer.needsRenaming(New))
+            New.setName(Renamer.getRename(Orig));
           New.setLinkage(GlobalValue::ExternalLinkage);
           New.setVisibility(GlobalValue::HiddenVisibility);
         }
+        assert(!Renamer.needsRenaming(New) && "Invalid function name.");
       };
 
     CloneSubModule(*KVPair.first, M, ExtractGlobalVars, ExtractFunctions,
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 00e39bb..4023344 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -26,15 +26,21 @@ namespace orc {
 
 class OrcMCJITReplacement : public ExecutionEngine {
 
-  class ForwardingRTDyldMM : public RTDyldMemoryManager {
+  // OrcMCJITReplacement needs to do a little extra book-keeping to ensure that
+  // Orc's automatic finalization doesn't kick in earlier than MCJIT clients are
+  // expecting - see finalizeMemory.
+  class MCJITReplacementMemMgr : public MCJITMemoryManager {
   public:
-    ForwardingRTDyldMM(OrcMCJITReplacement &M) : M(M) {}
+    MCJITReplacementMemMgr(OrcMCJITReplacement &M,
+                           std::shared_ptr<MCJITMemoryManager> ClientMM)
+      : M(M), ClientMM(std::move(ClientMM)) {}
 
     uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID,
                                  StringRef SectionName) override {
       uint8_t *Addr =
-          M.MM->allocateCodeSection(Size, Alignment, SectionID, SectionName);
+          ClientMM->allocateCodeSection(Size, Alignment, SectionID,
+                                        SectionName);
       M.SectionsAllocatedSinceLastLoad.insert(Addr);
       return Addr;
     }
@@ -42,43 +48,35 @@ class OrcMCJITReplacement : public ExecutionEngine {
     uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID, StringRef SectionName,
                                  bool IsReadOnly) override {
-      uint8_t *Addr = M.MM->allocateDataSection(Size, Alignment, SectionID,
-                                                SectionName, IsReadOnly);
+      uint8_t *Addr = ClientMM->allocateDataSection(Size, Alignment, SectionID,
+                                                    SectionName, IsReadOnly);
       M.SectionsAllocatedSinceLastLoad.insert(Addr);
       return Addr;
     }
 
     void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
                                 uintptr_t DataSizeRW) override {
-      return M.MM->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+      return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO,
+                                                DataSizeRW);
     }
 
     bool needsToReserveAllocationSpace() override {
-      return M.MM->needsToReserveAllocationSpace();
+      return ClientMM->needsToReserveAllocationSpace();
     }
 
     void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
                           size_t Size) override {
-      return M.MM->registerEHFrames(Addr, LoadAddr, Size);
+      return ClientMM->registerEHFrames(Addr, LoadAddr, Size);
     }
 
     void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
                             size_t Size) override {
-      return M.MM->deregisterEHFrames(Addr, LoadAddr, Size);
-    }
-
-    uint64_t getSymbolAddress(const std::string &Name) override {
-      return M.getSymbolAddressWithoutMangling(Name);
-    }
-
-    void *getPointerToNamedFunction(const std::string &Name,
-                                    bool AbortOnFailure = true) override {
-      return M.MM->getPointerToNamedFunction(Name, AbortOnFailure);
+      return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
     }
 
     void notifyObjectLoaded(ExecutionEngine *EE,
                             const object::ObjectFile &O) override {
-      return M.MM->notifyObjectLoaded(EE, O);
+      return ClientMM->notifyObjectLoaded(EE, O);
     }
 
     bool finalizeMemory(std::string *ErrMsg = nullptr) override {
@@ -96,21 +94,41 @@ class OrcMCJITReplacement : public ExecutionEngine {
       // get more than one set of objects loaded but not yet finalized is if
       // they were loaded during relocation of another set.
       if (M.UnfinalizedSections.size() == 1)
-        return M.MM->finalizeMemory(ErrMsg);
+        return ClientMM->finalizeMemory(ErrMsg);
       return false;
     }
 
   private:
     OrcMCJITReplacement &M;
+    std::shared_ptr<MCJITMemoryManager> ClientMM;
+  };
+
+  class LinkingResolver : public RuntimeDyld::SymbolResolver {
+  public:
+    LinkingResolver(OrcMCJITReplacement &M) : M(M) {}
+
+    RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+      return M.findMangledSymbol(Name);
+    }
+
+    RuntimeDyld::SymbolInfo
+    findSymbolInLogicalDylib(const std::string &Name) override {
+      return M.ClientResolver->findSymbolInLogicalDylib(Name);
+    }
+
+  private:
+    OrcMCJITReplacement &M;
   };
 
 private:
 
   static ExecutionEngine *
   createOrcMCJITReplacement(std::string *ErrorMsg,
-                            std::unique_ptr<RTDyldMemoryManager> OrcJMM,
+                            std::shared_ptr<MCJITMemoryManager> MemMgr,
+                            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
                             std::unique_ptr<TargetMachine> TM) {
-    return new OrcMCJITReplacement(std::move(OrcJMM), std::move(TM));
+    return new OrcMCJITReplacement(std::move(MemMgr), std::move(Resolver),
+                                   std::move(TM));
   }
 
 public:
@@ -118,12 +136,15 @@ public:
     OrcMCJITReplacementCtor = createOrcMCJITReplacement;
   }
 
-  OrcMCJITReplacement(std::unique_ptr<RTDyldMemoryManager> MM,
-                      std::unique_ptr<TargetMachine> TM)
-      : TM(std::move(TM)), MM(std::move(MM)), Mang(this->TM->getDataLayout()),
+  OrcMCJITReplacement(
+                    std::shared_ptr<MCJITMemoryManager> MemMgr,
+                    std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
+                    std::unique_ptr<TargetMachine> TM)
+      : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)),
+        Resolver(*this), ClientResolver(std::move(ClientResolver)),
+        Mang(this->TM->getDataLayout()),
         NotifyObjectLoaded(*this), NotifyFinalized(*this),
-        ObjectLayer(ObjectLayerT::CreateRTDyldMMFtor(), NotifyObjectLoaded,
-                    NotifyFinalized),
+        ObjectLayer(NotifyObjectLoaded, NotifyFinalized),
         CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
         LazyEmitLayer(CompileLayer) {
     setDataLayout(this->TM->getDataLayout());
@@ -139,15 +160,13 @@ public:
     Modules.push_back(std::move(M));
     std::vector<Module *> Ms;
     Ms.push_back(&*Modules.back());
-    LazyEmitLayer.addModuleSet(std::move(Ms),
-                               llvm::make_unique<ForwardingRTDyldMM>(*this));
+    LazyEmitLayer.addModuleSet(std::move(Ms), &MemMgr, &Resolver);
   }
 
   void addObjectFile(std::unique_ptr<object::ObjectFile> O) override {
     std::vector<std::unique_ptr<object::ObjectFile>> Objs;
     Objs.push_back(std::move(O));
-    ObjectLayer.addObjectSet(std::move(Objs),
-                             llvm::make_unique<ForwardingRTDyldMM>(*this));
+    ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
   }
 
   void addObjectFile(object::OwningBinary<object::ObjectFile> O) override {
@@ -157,8 +176,7 @@ public:
     std::vector<std::unique_ptr<object::ObjectFile>> Objs;
     Objs.push_back(std::move(Obj));
     auto H =
-      ObjectLayer.addObjectSet(std::move(Objs),
-                               llvm::make_unique<ForwardingRTDyldMM>(*this));
+      ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
 
     std::vector<std::unique_ptr<MemoryBuffer>> Bufs;
     Bufs.push_back(std::move(Buf));
@@ -170,7 +188,11 @@ public:
   }
 
   uint64_t getSymbolAddress(StringRef Name) {
-    return getSymbolAddressWithoutMangling(Mangle(Name));
+    return findSymbol(Name).getAddress();
+  }
+
+  RuntimeDyld::SymbolInfo findSymbol(StringRef Name) {
+    return findMangledSymbol(Mangle(Name));
   }
 
   void finalizeObject() override {
@@ -214,18 +236,19 @@ public:
   }
 
 private:
-  uint64_t getSymbolAddressWithoutMangling(StringRef Name) {
-    if (uint64_t Addr = LazyEmitLayer.findSymbol(Name, false).getAddress())
-      return Addr;
-    if (uint64_t Addr = MM->getSymbolAddress(Name))
-      return Addr;
-    if (uint64_t Addr = scanArchives(Name))
-      return Addr;
 
-    return 0;
+  RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) {
+    if (auto Sym = LazyEmitLayer.findSymbol(Name, false))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+    if (auto Sym = ClientResolver->findSymbol(Name))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+    if (auto Sym = scanArchives(Name))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+
+    return nullptr;
   }
 
-  uint64_t scanArchives(StringRef Name) {
+  JITSymbol scanArchives(StringRef Name) {
     for (object::OwningBinary<object::Archive> &OB : Archives) {
       object::Archive *A = OB.getBinary();
       // Look for our symbols in each Archive
@@ -241,14 +264,13 @@ private:
           std::vector<std::unique_ptr<object::ObjectFile>> ObjSet;
           ObjSet.push_back(std::unique_ptr<object::ObjectFile>(
               static_cast<object::ObjectFile *>(ChildBin.release())));
-          ObjectLayer.addObjectSet(
-              std::move(ObjSet), llvm::make_unique<ForwardingRTDyldMM>(*this));
-          if (uint64_t Addr = ObjectLayer.findSymbol(Name, true).getAddress())
-            return Addr;
+          ObjectLayer.addObjectSet(std::move(ObjSet), &MemMgr, &Resolver);
+          if (auto Sym = ObjectLayer.findSymbol(Name, true))
+            return Sym;
         }
       }
     }
-    return 0;
+    return nullptr;
   }
 
   class NotifyObjectLoadedT {
@@ -267,7 +289,7 @@ private:
       assert(Objects.size() == Infos.size() &&
              "Incorrect number of Infos for Objects.");
       for (unsigned I = 0; I < Objects.size(); ++I)
-        M.MM->notifyObjectLoaded(&M, *Objects[I]);
+        M.MemMgr.notifyObjectLoaded(&M, *Objects[I]);
     };
 
   private:
@@ -299,7 +321,9 @@ private:
   typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT;
 
   std::unique_ptr<TargetMachine> TM;
-  std::unique_ptr<RTDyldMemoryManager> MM;
+  MCJITReplacementMemMgr MemMgr;
+  LinkingResolver Resolver;
+  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
   Mangler Mang;
 
   NotifyObjectLoadedT NotifyObjectLoaded;
diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
index 6fe5301..fc56e67 100644
--- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -6,39 +6,6 @@ using namespace llvm::orc;
 
 namespace {
 
-std::array<const char *, 12> X86GPRsToSave = {{
-    "rbp", "rbx", "r12", "r13", "r14", "r15", // Callee saved.
-    "rdi", "rsi", "rdx", "rcx", "r8", "r9",   // Int args.
-}};
-
-std::array<const char *, 8> X86XMMsToSave = {{
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" // FP args
-}};
-
-template <typename OStream> unsigned saveX86Regs(OStream &OS) {
-  for (const auto &GPR : X86GPRsToSave)
-    OS << "  pushq   %" << GPR << "\n";
-
-  OS << "  subq    $" << (16 * X86XMMsToSave.size()) << ", %rsp\n";
-
-  for (unsigned i = 0; i < X86XMMsToSave.size(); ++i)
-    OS << "  movdqu  %" << X86XMMsToSave[i] << ", "
-       << (16 * (X86XMMsToSave.size() - i - 1)) << "(%rsp)\n";
-
-  return (8 * X86GPRsToSave.size()) + (16 * X86XMMsToSave.size());
-}
-
-template <typename OStream> void restoreX86Regs(OStream &OS) {
-  for (unsigned i = 0; i < X86XMMsToSave.size(); ++i)
-    OS << "  movdqu  " << (16 * i) << "(%rsp), %"
-       << X86XMMsToSave[(X86XMMsToSave.size() - i - 1)] << "\n";
-  OS << "  addq    $" << (16 * X86XMMsToSave.size()) << ", %rsp\n";
-
-  for (unsigned i = 0; i < X86GPRsToSave.size(); ++i)
-    OS << "  popq    %" << X86GPRsToSave[X86GPRsToSave.size() - i - 1] << "\n";
-}
-
-template <typename TargetT>
 uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
                                 TargetAddress CallbackID) {
   return JCBM->executeCompileCallback(CallbackID);
@@ -53,14 +20,28 @@ const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
 
 void OrcX86_64::insertResolverBlock(
     Module &M, JITCompileCallbackManagerBase &JCBM) {
+
+  // Trampoline code-sequence length, used to get trampoline address from return
+  // address.
   const unsigned X86_64_TrampolineLength = 6;
-  auto CallbackPtr = executeCompileCallback<OrcX86_64>;
+
+  // List of x86-64 GPRs to save. Note - RBP saved separately below.
+  std::array<const char *, 14> GPRs = {{
+      "rax", "rbx", "rcx", "rdx",
+      "rsi", "rdi", "r8", "r9",
+      "r10", "r11", "r12", "r13",
+      "r14", "r15"
+    }};
+
+  // Address of the executeCompileCallback function.
   uint64_t CallbackAddr =
-      static_cast<uint64_t>(reinterpret_cast<uintptr_t>(CallbackPtr));
+      static_cast<uint64_t>(
+        reinterpret_cast<uintptr_t>(executeCompileCallback));
 
   std::ostringstream AsmStream;
   Triple TT(M.getTargetTriple());
 
+  // Switch to text section.
   if (TT.getOS() == Triple::Darwin)
     AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
               << ".align 4, 0x90\n";
@@ -68,24 +49,51 @@ void OrcX86_64::insertResolverBlock(
     AsmStream << ".text\n"
               << ".align 16, 0x90\n";
 
+  // Bake in a pointer to the callback manager immediately before the
+  // start of the resolver function.
   AsmStream << "jit_callback_manager_addr:\n"
-            << "  .quad " << &JCBM << "\n"
-            << ResolverBlockName << ":\n";
-
-  uint64_t ReturnAddrOffset = saveX86Regs(AsmStream);
-
-  // Compute index, load object address, and call JIT.
-  AsmStream << "  leaq    jit_callback_manager_addr(%rip), %rdi\n"
+            << "  .quad " << &JCBM << "\n";
+
+  // Start the resolver function.
+  AsmStream << ResolverBlockName << ":\n"
+            << "  pushq   %rbp\n"
+            << "  movq    %rsp, %rbp\n";
+
+  // Store the GPRs.
+  for (const auto &GPR : GPRs)
+    AsmStream << "  pushq   %" << GPR << "\n";
+
+  // Store floating-point state with FXSAVE.
+  // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
+  //       number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
+  //       an extra 64 bits of padding to the FXSave area.
+  unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
+  unsigned FXSaveSize = 512 + Padding;
+  AsmStream << "  subq    $" << FXSaveSize << ", %rsp\n"
+            << "  fxsave  (%rsp)\n"
+
+  // Load callback manager address, compute trampoline address, call JIT.
+            << "  lea     jit_callback_manager_addr(%rip), %rdi\n"
             << "  movq    (%rdi), %rdi\n"
-            << "  movq    " << ReturnAddrOffset << "(%rsp), %rsi\n"
+            << "  movq    0x8(%rbp), %rsi\n"
             << "  subq    $" << X86_64_TrampolineLength << ", %rsi\n"
             << "  movabsq $" << CallbackAddr << ", %rax\n"
             << "  callq   *%rax\n"
-            << "  movq    %rax, " << ReturnAddrOffset << "(%rsp)\n";
 
-  restoreX86Regs(AsmStream);
+  // Replace the return to the trampoline with the return address of the
+  // compiled function body.
+            << "  movq    %rax, 0x8(%rbp)\n"
+
+  // Restore the floating point state.
+            << "  fxrstor (%rsp)\n"
+            << "  addq    $" << FXSaveSize << ", %rsp\n";
+
+  for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
+    AsmStream << "  popq    %" << GPR << "\n";
 
-  AsmStream << "  retq\n";
+  // Restore original RBP and return to compiled function body.
+  AsmStream << "  popq    %rbp\n"
+            << "  retq\n";
 
   M.appendModuleInlineAsm(AsmStream.str());
 }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index a0ed7cf..a13ecb7 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -57,7 +57,8 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
   unsigned BytesRemaining = S.Size;
 
   if (StartPadding) {
-    dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr & ~(ColsPerRow - 1)) << ":";
+    dbgs() << "\n" << format("0x%016" PRIx64,
+                             LoadAddr & ~(uint64_t)(ColsPerRow - 1)) << ":";
     while (StartPadding--)
       dbgs() << "   ";
   }
@@ -92,7 +93,7 @@ void RuntimeDyldImpl::resolveRelocations() {
     // entry provides the section to which the relocation will be applied.
     uint64_t Addr = Sections[i].LoadAddress;
     DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t"
-                 << format("0x%x", Addr) << "\n");
+                 << format("%p", (uintptr_t)Addr) << "\n");
     DEBUG(dumpSectionMemory(Sections[i], "before relocations"));
     resolveRelocationList(Relocations[i], Addr);
     DEBUG(dumpSectionMemory(Sections[i], "after relocations"));
@@ -151,10 +152,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
 
   // Compute the memory size required to load all sections to be loaded
   // and pass this information to the memory manager
-  if (MemMgr->needsToReserveAllocationSpace()) {
+  if (MemMgr.needsToReserveAllocationSpace()) {
     uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0;
     computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW);
-    MemMgr->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+    MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
   }
 
   // Used sections from the object file
@@ -360,19 +361,20 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
       if (Name == ".eh_frame")
         SectionSize += 4;
 
-      if (SectionSize > 0) {
-        // save the total size of the section
-        if (IsCode) {
-          CodeSectionSizes.push_back(SectionSize);
-        } else if (IsReadOnly) {
-          ROSectionSizes.push_back(SectionSize);
-        } else {
-          RWSectionSizes.push_back(SectionSize);
-        }
-        // update the max alignment
-        if (Alignment > MaxAlignment) {
-          MaxAlignment = Alignment;
-        }
+      if (!SectionSize)
+        SectionSize = 1;
+
+      if (IsCode) {
+        CodeSectionSizes.push_back(SectionSize);
+      } else if (IsReadOnly) {
+        ROSectionSizes.push_back(SectionSize);
+      } else {
+        RWSectionSizes.push_back(SectionSize);
+      }
+
+      // update the max alignment
+      if (Alignment > MaxAlignment) {
+        MaxAlignment = Alignment;
       }
     }
   }
@@ -485,7 +487,7 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
 
     // Skip common symbols already elsewhere.
     if (GlobalSymbolTable.count(Name) ||
-        MemMgr->getSymbolAddressInLogicalDylib(Name)) {
+        Resolver.findSymbolInLogicalDylib(Name)) {
       DEBUG(dbgs() << "\tSkipping already emitted common symbol '" << Name
                    << "'\n");
       continue;
@@ -502,8 +504,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
 
   // Allocate memory for the section
   unsigned SectionID = Sections.size();
-  uint8_t *Addr = MemMgr->allocateDataSection(CommonSize, sizeof(void *),
-                                              SectionID, StringRef(), false);
+  uint8_t *Addr = MemMgr.allocateDataSection(CommonSize, sizeof(void *),
+                                             SectionID, StringRef(), false);
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
@@ -577,10 +579,12 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   if (IsRequired) {
     Check(Section.getContents(data));
     Allocate = DataSize + PaddingSize + StubBufSize;
-    Addr = IsCode ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID,
-                                                Name)
-                  : MemMgr->allocateDataSection(Allocate, Alignment, SectionID,
-                                                Name, IsReadOnly);
+    if (!Allocate)
+      Allocate = 1;
+    Addr = IsCode ? MemMgr.allocateCodeSection(Allocate, Alignment, SectionID,
+                                               Name)
+                  : MemMgr.allocateDataSection(Allocate, Alignment, SectionID,
+                                               Name, IsReadOnly);
     if (!Addr)
       report_fatal_error("Unable to allocate section memory!");
 
@@ -787,9 +791,9 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
       uint64_t Addr = 0;
       RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(Name);
       if (Loc == GlobalSymbolTable.end()) {
-        // This is an external symbol, try to get its address from
-        // MemoryManager.
-        Addr = MemMgr->getSymbolAddress(Name.data());
+        // This is an external symbol, try to get its address from the symbol
+        // resolver.
+        Addr = Resolver.findSymbol(Name.data()).getAddress();
         // The call to getSymbolAddress may have caused additional modules to
         // be loaded, which may have added new entries to the
         // ExternalSymbolRelocations map.  Consquently, we need to update our
@@ -810,7 +814,6 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
         report_fatal_error("Program used external function '" + Name +
                            "' which could not be resolved!");
 
-      updateGOTEntries(Name, Addr);
       DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t"
                    << format("0x%lx", Addr) << "\n");
       // This list may have been updated when we called getSymbolAddress, so
@@ -835,7 +838,12 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
   return 0;
 }
 
-RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+void RuntimeDyld::MemoryManager::anchor() {}
+void RuntimeDyld::SymbolResolver::anchor() {}
+
+RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr,
+                         RuntimeDyld::SymbolResolver &Resolver)
+    : MemMgr(MemMgr), Resolver(Resolver) {
   // FIXME: There's a potential issue lurking here if a single instance of
   // RuntimeDyld is used to load multiple objects.  The current implementation
   // associates a single memory manager with a RuntimeDyld instance.  Even
@@ -843,7 +851,6 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
   // they share a single memory manager.  This can become a problem when page
   // permissions are applied.
   Dyld = nullptr;
-  MM = mm;
   ProcessAllSections = false;
   Checker = nullptr;
 }
@@ -851,27 +858,33 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
 RuntimeDyld::~RuntimeDyld() {}
 
 static std::unique_ptr<RuntimeDyldCOFF>
-createRuntimeDyldCOFF(Triple::ArchType Arch, RTDyldMemoryManager *MM,
+createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                      RuntimeDyld::SymbolResolver &Resolver,
                       bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldCOFF> Dyld(RuntimeDyldCOFF::create(Arch, MM));
+  std::unique_ptr<RuntimeDyldCOFF> Dyld =
+    RuntimeDyldCOFF::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
 }
 
 static std::unique_ptr<RuntimeDyldELF>
-createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections,
-                     RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM));
+createRuntimeDyldELF(RuntimeDyld::MemoryManager &MM,
+                     RuntimeDyld::SymbolResolver &Resolver,
+                     bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM, Resolver));
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
 }
 
 static std::unique_ptr<RuntimeDyldMachO>
-createRuntimeDyldMachO(Triple::ArchType Arch, RTDyldMemoryManager *MM,
-                       bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldMachO> Dyld(RuntimeDyldMachO::create(Arch, MM));
+createRuntimeDyldMachO(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                       RuntimeDyld::SymbolResolver &Resolver,
+                       bool ProcessAllSections,
+                       RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldMachO> Dyld =
+    RuntimeDyldMachO::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
@@ -881,14 +894,14 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyld::loadObject(const ObjectFile &Obj) {
   if (!Dyld) {
     if (Obj.isELF())
-      Dyld = createRuntimeDyldELF(MM, ProcessAllSections, Checker);
+      Dyld = createRuntimeDyldELF(MemMgr, Resolver, ProcessAllSections, Checker);
     else if (Obj.isMachO())
       Dyld = createRuntimeDyldMachO(
-               static_cast<Triple::ArchType>(Obj.getArch()), MM,
+               static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
                ProcessAllSections, Checker);
     else if (Obj.isCOFF())
       Dyld = createRuntimeDyldCOFF(
-               static_cast<Triple::ArchType>(Obj.getArch()), MM,
+               static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
                ProcessAllSections, Checker);
     else
       report_fatal_error("Incompatible object format!");
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 56bcb8e..8055d55 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -40,13 +40,15 @@ public:
 namespace llvm {
 
 std::unique_ptr<RuntimeDyldCOFF>
-llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) {
+llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
+                              RuntimeDyld::MemoryManager &MemMgr,
+                              RuntimeDyld::SymbolResolver &Resolver) {
   switch (Arch) {
   default:
     llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
     break;
   case Triple::x86_64:
-    return make_unique<RuntimeDyldCOFFX86_64>(MM);
+    return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
   }
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
index 681a3e5..32b8fa2 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
@@ -31,11 +31,15 @@ public:
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &Obj) override;
   bool isCompatibleFile(const object::ObjectFile &Obj) const override;
-  static std::unique_ptr<RuntimeDyldCOFF> create(Triple::ArchType Arch,
-                                                 RTDyldMemoryManager *MM);
+
+  static std::unique_ptr<RuntimeDyldCOFF>
+  create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr,
+         RuntimeDyld::SymbolResolver &Resolver);
 
 protected:
-  RuntimeDyldCOFF(RTDyldMemoryManager *MM) : RuntimeDyldImpl(MM) {}
+  RuntimeDyldCOFF(RuntimeDyld::MemoryManager &MemMgr,
+                  RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldImpl(MemMgr, Resolver) {}
   uint64_t getSymbolOffset(const SymbolRef &Sym);
 };
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index c991408..957571b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -738,7 +738,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
 uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
   if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
     return InternalSymbol.getAddress();
-  return getRTDyld().MemMgr->getSymbolAddress(Symbol);
+  return getRTDyld().Resolver.findSymbol(Symbol).getAddress();
 }
 
 uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index e8d299a..69d2a7d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -19,6 +19,7 @@ class RuntimeDyldCheckerImpl {
   friend class RuntimeDyldChecker;
   friend class RuntimeDyldImpl;
   friend class RuntimeDyldCheckerExprEval;
+  friend class RuntimeDyldELF;
 
 public:
   RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 6278170..bbffdfb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RuntimeDyldELF.h"
+#include "RuntimeDyldCheckerImpl.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -183,32 +184,30 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const {
 
 namespace llvm {
 
-RuntimeDyldELF::RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
+                               RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {}
 RuntimeDyldELF::~RuntimeDyldELF() {}
 
 void RuntimeDyldELF::registerEHFrames() {
-  if (!MemMgr)
-    return;
   for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = UnregisteredEHFrameSections[i];
     uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
     uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
     size_t EHFrameSize = Sections[EHFrameSID].Size;
-    MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+    MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
     RegisteredEHFrameSections.push_back(EHFrameSID);
   }
   UnregisteredEHFrameSections.clear();
 }
 
 void RuntimeDyldELF::deregisterEHFrames() {
-  if (!MemMgr)
-    return;
   for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = RegisteredEHFrameSections[i];
     uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
     uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
     size_t EHFrameSize = Sections[EHFrameSID].Size;
-    MemMgr->deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+    MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
   }
   RegisteredEHFrameSections.clear();
 }
@@ -247,27 +246,16 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
                  << format("%p\n", Section.Address + Offset));
     break;
   }
-  case ELF::R_X86_64_GOTPCREL: {
-    // findGOTEntry returns the 'G + GOT' part of the relocation calculation
-    // based on the load/target address of the GOT (not the current/local addr).
-    uint64_t GOTAddr = findGOTEntry(Value, SymOffset);
-    uint64_t FinalAddress = Section.LoadAddress + Offset;
-    // The processRelocationRef method combines the symbol offset and the addend
-    // and in most cases that's what we want.  For this relocation type, we need
-    // the raw addend, so we subtract the symbol offset to get it.
-    int64_t RealOffset = GOTAddr + Addend - SymOffset - FinalAddress;
-    assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
-    int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
-    support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
-    break;
-  }
   case ELF::R_X86_64_PC32: {
     // Get the placeholder value from the generated object since
     // a previous relocation attempt may have overwritten the loaded version
     support::ulittle32_t::ref Placeholder(
         (void *)(Section.ObjAddress + Offset));
     uint64_t FinalAddress = Section.LoadAddress + Offset;
-    int64_t RealOffset = Placeholder + Value + Addend - FinalAddress;
+    int64_t RealOffset = Value + Addend - FinalAddress;
+    // Don't add the placeholder if this is a stub
+    if (Offset < Section.Size)
+      RealOffset += Placeholder;
     assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
     int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
     support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
@@ -279,8 +267,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
     support::ulittle64_t::ref Placeholder(
         (void *)(Section.ObjAddress + Offset));
     uint64_t FinalAddress = Section.LoadAddress + Offset;
-    support::ulittle64_t::ref(Section.Address + Offset) =
-        Placeholder + Value + Addend - FinalAddress;
+    int64_t RealOffset = Value + Addend - FinalAddress;
+    if (Offset < Section.Size)
+      RealOffset += Placeholder;
+    support::ulittle64_t::ref(Section.Address + Offset) = RealOffset;
     break;
   }
   }
@@ -1325,16 +1315,18 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         Stubs[Value] = StubOffset;
         createStubFunction((uint8_t *)StubAddress);
 
-        // Create a GOT entry for the external function.
-        GOTEntries.push_back(Value);
-
-        // Make our stub function a relative call to the GOT entry.
-        RelocationEntry RE(SectionID, StubOffset + 2, ELF::R_X86_64_GOTPCREL,
-                           -4);
-        addRelocationForSymbol(RE, Value.SymbolName);
-
         // Bump our stub offset counter
         Section.StubOffset = StubOffset + getMaxStubSize();
+
+        // Allocate a GOT Entry
+        uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+
+        // The load of the GOT address has an addend of -4
+        resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
+
+        // Fill in the value of the symbol we're targeting into the GOT
+        addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64),
+            Value.SymbolName);
       }
 
       // Make the target call a call into the stub table.
@@ -1345,10 +1337,17 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
                          Value.Offset);
       addRelocationForSection(RE, Value.SectionID);
     }
+  } else if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) {
+    uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+    resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend);
+
+    // Fill in the value of the symbol we're targeting into the GOT
+    RelocationEntry RE = computeGOTOffsetRE(SectionID, GOTOffset, Value.Offset, ELF::R_X86_64_64);
+    if (Value.SymbolName)
+      addRelocationForSymbol(RE, Value.SymbolName);
+    else
+      addRelocationForSection(RE, Value.SectionID);
   } else {
-    if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) {
-      GOTEntries.push_back(Value);
-    }
     RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
@@ -1358,22 +1357,6 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
   return ++RelI;
 }
 
-void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) {
-
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator it;
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator end = GOTs.end();
-
-  for (it = GOTs.begin(); it != end; ++it) {
-    GOTRelocations &GOTEntries = it->second;
-    for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
-      if (GOTEntries[i].SymbolName != nullptr &&
-          GOTEntries[i].SymbolName == Name) {
-        GOTEntries[i].Offset = Addr;
-      }
-    }
-  }
-}
-
 size_t RuntimeDyldELF::getGOTEntrySize() {
   // We don't use the GOT in all of these cases, but it's essentially free
   // to put them all here.
@@ -1400,83 +1383,53 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
   return Result;
 }
 
-uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) {
-
-  const size_t GOTEntrySize = getGOTEntrySize();
-
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator it;
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator end =
-      GOTs.end();
-
-  int GOTIndex = -1;
-  for (it = GOTs.begin(); it != end; ++it) {
-    SID GOTSectionID = it->first;
-    const GOTRelocations &GOTEntries = it->second;
-
-    // Find the matching entry in our vector.
-    uint64_t SymbolOffset = 0;
-    for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
-      if (!GOTEntries[i].SymbolName) {
-        if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress &&
-            GOTEntries[i].Offset == Offset) {
-          GOTIndex = i;
-          SymbolOffset = GOTEntries[i].Offset;
-          break;
-        }
-      } else {
-        // GOT entries for external symbols use the addend as the address when
-        // the external symbol has been resolved.
-        if (GOTEntries[i].Offset == LoadAddress) {
-          GOTIndex = i;
-          // Don't use the Addend here.  The relocation handler will use it.
-          break;
-        }
-      }
-    }
-
-    if (GOTIndex != -1) {
-      if (GOTEntrySize == sizeof(uint64_t)) {
-        uint64_t *LocalGOTAddr = (uint64_t *)getSectionAddress(GOTSectionID);
-        // Fill in this entry with the address of the symbol being referenced.
-        LocalGOTAddr[GOTIndex] = LoadAddress + SymbolOffset;
-      } else {
-        uint32_t *LocalGOTAddr = (uint32_t *)getSectionAddress(GOTSectionID);
-        // Fill in this entry with the address of the symbol being referenced.
-        LocalGOTAddr[GOTIndex] = (uint32_t)(LoadAddress + SymbolOffset);
-      }
-
-      // Calculate the load address of this entry
-      return getSectionLoadAddress(GOTSectionID) + (GOTIndex * GOTEntrySize);
-    }
+uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no)
+{
+  (void)SectionID; // The GOT Section is the same for all section in the object file
+  if (GOTSectionID == 0) {
+    GOTSectionID = Sections.size();
+    // Reserve a section id. We'll allocate the section later
+    // once we know the total size
+    Sections.push_back(SectionEntry(".got", 0, 0, 0));
   }
+  uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize();
+  CurrentGOTIndex += no;
+  return StartOffset;
+}
 
-  assert(GOTIndex != -1 && "Unable to find requested GOT entry.");
-  return 0;
+void RuntimeDyldELF::resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset, uint64_t GOTOffset)
+{
+  // Fill in the relative address of the GOT Entry into the stub
+  RelocationEntry GOTRE(SectionID, Offset, ELF::R_X86_64_PC32, GOTOffset);
+  addRelocationForSection(GOTRE, GOTSectionID);
+}
+
+RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t GOTOffset, uint64_t SymbolOffset,
+                                                   uint32_t Type)
+{
+  (void)SectionID; // The GOT Section is the same for all section in the object file
+  return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset);
 }
 
 void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
                                   ObjSectionToIDMap &SectionMap) {
   // If necessary, allocate the global offset table
-  if (MemMgr) {
-    // Allocate the GOT if necessary
-    size_t numGOTEntries = GOTEntries.size();
-    if (numGOTEntries != 0) {
-      // Allocate memory for the section
-      unsigned SectionID = Sections.size();
-      size_t TotalSize = numGOTEntries * getGOTEntrySize();
-      uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, getGOTEntrySize(),
-                                                  SectionID, ".got", false);
-      if (!Addr)
-        report_fatal_error("Unable to allocate memory for GOT!");
-
-      GOTs.push_back(std::make_pair(SectionID, GOTEntries));
-      Sections.push_back(SectionEntry(".got", Addr, TotalSize, 0));
-      // For now, initialize all GOT entries to zero.  We'll fill them in as
-      // needed when GOT-based relocations are applied.
-      memset(Addr, 0, TotalSize);
-    }
-  } else {
-    report_fatal_error("Unable to allocate memory for GOT!");
+  if (GOTSectionID != 0) {
+    // Allocate memory for the section
+    size_t TotalSize = CurrentGOTIndex * getGOTEntrySize();
+    uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(),
+                                                GOTSectionID, ".got", false);
+    if (!Addr)
+      report_fatal_error("Unable to allocate memory for GOT!");
+
+    Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, 0);
+
+    if (Checker)
+      Checker->registerSection(Obj.getFileName(), GOTSectionID);
+
+    // For now, initialize all GOT entries to zero.  We'll fill them in as
+    // needed when GOT-based relocations are applied.
+    memset(Addr, 0, TotalSize);
   }
 
   // Look for and record the EH frame section.
@@ -1490,6 +1443,9 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
       break;
     }
   }
+
+  GOTSectionID = 0;
+  CurrentGOTIndex = 0;
 }
 
 bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 71260d0..590d26a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -80,16 +80,32 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
                            ObjSectionToIDMap &LocalSections,
                            RelocationValueRef &Rel);
 
-  uint64_t findGOTEntry(uint64_t LoadAddr, uint64_t Offset);
   size_t getGOTEntrySize();
 
-  void updateGOTEntries(StringRef Name, uint64_t Addr) override;
+  SectionEntry &getSection(unsigned SectionID) { return Sections[SectionID]; }
 
-  // Relocation entries for symbols whose position-independent offset is
-  // updated in a global offset table.
-  typedef SmallVector<RelocationValueRef, 2> GOTRelocations;
-  GOTRelocations GOTEntries; // List of entries requiring finalization.
-  SmallVector<std::pair<SID, GOTRelocations>, 8> GOTs; // Allocated tables.
+  // Allocate no GOT entries for use in the given section.
+  uint64_t allocateGOTEntries(unsigned SectionID, unsigned no);
+
+  // Resolve the relvative address of GOTOffset in Section ID and place
+  // it at the given Offset
+  void resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset,
+                                  uint64_t GOTOffset);
+
+  // For a GOT entry referenced from SectionID, compute a relocation entry
+  // that will place the final resolved value in the GOT slot
+  RelocationEntry computeGOTOffsetRE(unsigned SectionID,
+                                     uint64_t GOTOffset,
+                                     uint64_t SymbolOffset,
+                                     unsigned Type);
+
+  // The tentative ID for the GOT section
+  unsigned GOTSectionID;
+
+  // Records the current number of allocated slots in the GOT
+  // (This would be equivalent to GOTEntries.size() were it not for relocations
+  // that consume more than one slot)
+  unsigned CurrentGOTIndex;
 
   // When a module is loaded we save the SectionID of the EH frame section
   // in a table until we receive a request to register all unregistered
@@ -98,8 +114,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   SmallVector<SID, 2> RegisteredEHFrameSections;
 
 public:
-  RuntimeDyldELF(RTDyldMemoryManager *mm);
-  virtual ~RuntimeDyldELF();
+  RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
+                 RuntimeDyld::SymbolResolver &Resolver);
+  ~RuntimeDyldELF() override;
 
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 05060dd..ee51a75 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
 #include "llvm/Object/ObjectFile.h"
@@ -51,7 +52,7 @@ class Twine;
 class SectionEntry {
 public:
   /// Name - section name.
-  StringRef Name;
+  std::string Name;
 
   /// Address - address in the linker's memory where the section resides.
   uint8_t *Address;
@@ -188,7 +189,10 @@ class RuntimeDyldImpl {
   friend class RuntimeDyldCheckerImpl;
 protected:
   // The MemoryManager to load objects into.
-  RTDyldMemoryManager *MemMgr;
+  RuntimeDyld::MemoryManager &MemMgr;
+
+  // The symbol resolver to use for external symbols.
+  RuntimeDyld::SymbolResolver &Resolver;
 
   // Attached RuntimeDyldChecker instance. Null if no instance attached.
   RuntimeDyldCheckerImpl *Checker;
@@ -357,10 +361,6 @@ protected:
   /// \brief Resolve relocations to external symbols.
   void resolveExternalSymbols();
 
-  /// \brief Update GOT entries for external symbols.
-  // The base class does nothing.  ELF overrides this.
-  virtual void updateGOTEntries(StringRef Name, uint64_t Addr) {}
-
   // \brief Compute an upper bound of the memory that is required to load all
   // sections
   void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize,
@@ -374,8 +374,10 @@ protected:
   std::pair<unsigned, unsigned> loadObjectImpl(const object::ObjectFile &Obj);
 
 public:
-  RuntimeDyldImpl(RTDyldMemoryManager *mm)
-    : MemMgr(mm), Checker(nullptr), ProcessAllSections(false), HasError(false) {
+  RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
+                  RuntimeDyld::SymbolResolver &Resolver)
+    : MemMgr(MemMgr), Resolver(Resolver), Checker(nullptr),
+      ProcessAllSections(false), HasError(false) {
   }
 
   virtual ~RuntimeDyldImpl();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 2d39662..675063c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -178,25 +178,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const {
 }
 
 template <typename Impl>
-void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &ObjImg,
+void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
                                                   ObjSectionToIDMap &SectionMap) {
   unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
   unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
   unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
-  ObjSectionToIDMap::iterator i, e;
 
-  for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
-    const SectionRef &Section = i->first;
+  for (const auto &Section : Obj.sections()) {
     StringRef Name;
     Section.getName(Name);
-    if (Name == "__eh_frame")
-      EHFrameSID = i->second;
-    else if (Name == "__text")
-      TextSID = i->second;
+
+    // Force emission of the __text, __eh_frame, and __gcc_except_tab sections
+    // if they're present. Otherwise call down to the impl to handle other
+    // sections that have already been emitted.
+    if (Name == "__text")
+      TextSID = findOrEmitSection(Obj, Section, true, SectionMap);
+    else if (Name == "__eh_frame")
+      EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap);
     else if (Name == "__gcc_except_tab")
-      ExceptTabSID = i->second;
-    else
-      impl().finalizeSection(ObjImg, i->second, Section);
+      ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap);
+    else {
+      auto I = SectionMap.find(Section);
+      if (I != SectionMap.end())
+        impl().finalizeSection(Obj, I->second, Section);
+    }
   }
   UnregisteredEHFrameSections.push_back(
     EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
@@ -239,7 +244,8 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
 }
 
 static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
-  int64_t ObjDistance = A->ObjAddress - B->ObjAddress;
+  int64_t ObjDistance =
+    static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress);
   int64_t MemDistance = A->LoadAddress - B->LoadAddress;
   return ObjDistance - MemDistance;
 }
@@ -247,8 +253,6 @@ static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
 template <typename Impl>
 void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
 
-  if (!MemMgr)
-    return;
   for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
     EHFrameRelatedSections &SectionInfo = UnregisteredEHFrameSections[i];
     if (SectionInfo.EHFrameSID == RTDYLD_INVALID_SECTION_ID ||
@@ -271,22 +275,28 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
       P = processFDE(P, DeltaForText, DeltaForEH);
     } while (P != End);
 
-    MemMgr->registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
-                             EHFrame->Size);
+    MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
+                            EHFrame->Size);
   }
   UnregisteredEHFrameSections.clear();
 }
 
 std::unique_ptr<RuntimeDyldMachO>
-RuntimeDyldMachO::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) {
+RuntimeDyldMachO::create(Triple::ArchType Arch,
+                         RuntimeDyld::MemoryManager &MemMgr,
+                         RuntimeDyld::SymbolResolver &Resolver) {
   switch (Arch) {
   default:
     llvm_unreachable("Unsupported target for RuntimeDyldMachO.");
     break;
-  case Triple::arm: return make_unique<RuntimeDyldMachOARM>(MM);
-  case Triple::aarch64: return make_unique<RuntimeDyldMachOAArch64>(MM);
-  case Triple::x86: return make_unique<RuntimeDyldMachOI386>(MM);
-  case Triple::x86_64: return make_unique<RuntimeDyldMachOX86_64>(MM);
+  case Triple::arm:
+    return make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver);
+  case Triple::aarch64:
+    return make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
+  case Triple::x86:
+    return make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver);
+  case Triple::x86_64:
+    return make_unique<RuntimeDyldMachOX86_64>(MemMgr, Resolver);
   }
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index f8bfc03..45a94ba 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -49,7 +49,9 @@ protected:
   // EH frame sections with the memory manager.
   SmallVector<EHFrameRelatedSections, 2> UnregisteredEHFrameSections;
 
-  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+  RuntimeDyldMachO(RuntimeDyld::MemoryManager &MemMgr,
+                   RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldImpl(MemMgr, Resolver) {}
 
   /// This convenience method uses memcpy to extract a contiguous addend (the
   /// addend size and offset are taken from the corresponding fields of the RE).
@@ -114,8 +116,10 @@ protected:
 public:
 
   /// Create a RuntimeDyldMachO instance for the given target architecture.
-  static std::unique_ptr<RuntimeDyldMachO> create(Triple::ArchType Arch,
-                                                  RTDyldMemoryManager *mm);
+  static std::unique_ptr<RuntimeDyldMachO>
+  create(Triple::ArchType Arch,
+         RuntimeDyld::MemoryManager &MemMgr,
+         RuntimeDyld::SymbolResolver &Resolver);
 
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
@@ -142,7 +146,9 @@ private:
                             int64_t DeltaForEH);
 
 public:
-  RuntimeDyldMachOCRTPBase(RTDyldMemoryManager *mm) : RuntimeDyldMachO(mm) {}
+  RuntimeDyldMachOCRTPBase(RuntimeDyld::MemoryManager &MemMgr,
+                           RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldMachO(MemMgr, Resolver) {}
 
   void finalizeLoad(const ObjectFile &Obj,
                     ObjSectionToIDMap &SectionMap) override;
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index ce2f4a2..cd534a1 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -32,7 +32,9 @@ private:
   SmallVector<SID, 2> RegisteredEHFrameSections;
 
 public:
-  RuntimeDyldCOFFX86_64(RTDyldMemoryManager *MM) : RuntimeDyldCOFF(MM) {}
+  RuntimeDyldCOFFX86_64(RuntimeDyld::MemoryManager &MM,
+                        RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldCOFF(MM, Resolver) {}
 
   unsigned getMaxStubSize() override {
     return 6; // 2-byte jmp instruction + 32-bit relative address
@@ -177,13 +179,11 @@ public:
 
   unsigned getStubAlignment() override { return 1; }
   void registerEHFrames() override {
-    if (!MemMgr)
-      return;
     for (auto const &EHFrameSID : UnregisteredEHFrameSections) {
       uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
       uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
       size_t EHFrameSize = Sections[EHFrameSID].Size;
-      MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+      MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
       RegisteredEHFrameSections.push_back(EHFrameSID);
     }
     UnregisteredEHFrameSections.clear();
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 196fa62..99fd6e3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -23,8 +23,9 @@ public:
 
   typedef uint64_t TargetPtrT;
 
-  RuntimeDyldMachOAArch64(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOAArch64(RuntimeDyld::MemoryManager &MM,
+                          RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 09e430e..09e51f2 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -25,7 +25,9 @@ public:
 
   typedef uint32_t TargetPtrT;
 
-  RuntimeDyldMachOARM(RTDyldMemoryManager *MM) : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOARM(RuntimeDyld::MemoryManager &MM,
+                      RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 67d7027..053f90c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -22,8 +22,9 @@ public:
 
   typedef uint32_t TargetPtrT;
 
-  RuntimeDyldMachOI386(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOI386(RuntimeDyld::MemoryManager &MM,
+                       RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 0; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 0734017..4b3b01b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -22,8 +22,9 @@ public:
 
   typedef uint64_t TargetPtrT;
 
-  RuntimeDyldMachOX86_64(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOX86_64(RuntimeDyld::MemoryManager &MM,
+                         RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index 81e51d1..bfd87ec 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -1,8 +1,10 @@
-# Disable the coverage instrumentation for the fuzzer itself.
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -fsanitize-coverage=0")
-if( LLVM_USE_SANITIZE_COVERAGE  )
+set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS_RELEASE}")
+# Disable the coverage and sanitizer instrumentation for the fuzzer itself.
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O2 -fno-sanitize=all")
+if( LLVM_USE_SANITIZE_COVERAGE )
   add_library(LLVMFuzzerNoMain OBJECT
     FuzzerCrossOver.cpp
+    FuzzerDFSan.cpp
     FuzzerDriver.cpp
     FuzzerIO.cpp
     FuzzerLoop.cpp
diff --git a/lib/Fuzzer/FuzzerDFSan.cpp b/lib/Fuzzer/FuzzerDFSan.cpp
new file mode 100644
index 0000000..16f8c0f
--- /dev/null
+++ b/lib/Fuzzer/FuzzerDFSan.cpp
@@ -0,0 +1,275 @@
+//===- FuzzerDFSan.cpp - DFSan-based fuzzer mutator -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// DataFlowSanitizer (DFSan) is a tool for
+// generalised dynamic data flow (taint) analysis:
+// http://clang.llvm.org/docs/DataFlowSanitizer.html .
+//
+// This file implements a mutation algorithm based on taint
+// analysis feedback from DFSan.
+//
+// The approach has some similarity to "Taint-based Directed Whitebox Fuzzing"
+// by Vijay Ganesh & Tim Leek & Martin Rinard:
+// http://dspace.mit.edu/openaccess-disseminate/1721.1/59320,
+// but it uses a full blown LLVM IR taint analysis and separate instrumentation
+// to analyze all of the "attack points" at once.
+//
+// Workflow:
+//   * lib/Fuzzer/Fuzzer*.cpp is compiled w/o any instrumentation.
+//   * The code under test is compiled with DFSan *and* with special extra hooks
+//     that are inserted before dfsan. Currently supported hooks:
+//     - __sanitizer_cov_trace_cmp: inserted before every ICMP instruction,
+//       receives the type, size and arguments of ICMP.
+//   * Every call to HOOK(a,b) is replaced by DFSan with
+//     __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK
+//     gets all the taint labels for the arguments.
+//   * At the Fuzzer startup we assign a unique DFSan label
+//     to every byte of the input string (Fuzzer::CurrentUnit) so that for any
+//     chunk of data we know which input bytes it has derived from.
+//   * The __dfsw_* functions (implemented in this file) record the
+//     parameters (i.e. the application data and the corresponding taint labels)
+//     in a global state.
+//   * Fuzzer::MutateWithDFSan() tries to use the data recorded by __dfsw_*
+//     hooks to guide the fuzzing towards new application states.
+//     For example if 4 bytes of data that derive from input bytes {4,5,6,7}
+//     are compared with a constant 12345 and the comparison always yields
+//     the same result, we try to insert 12345, 12344, 12346 into bytes
+//     {4,5,6,7} of the next fuzzed inputs.
+//
+// This code does not function when DFSan is not linked in.
+// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer
+// we redeclare the dfsan_* interface functions as weak and check if they
+// are nullptr before calling.
+// If this approach proves to be useful we may add attribute(weak) to the
+// dfsan declarations in dfsan_interface.h
+//
+// This module is in the "proof of concept" stage.
+// It is capable of solving only the simplest puzzles
+// like test/dfsan/DFSanSimpleCmpTest.cpp.
+//===----------------------------------------------------------------------===//
+
+/* Example of manual usage:
+(
+  cd $LLVM/lib/Fuzzer/
+  clang  -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp
+  clang++ -O0 -std=c++11 -fsanitize-coverage=3  \
+    -mllvm -sanitizer-coverage-experimental-trace-compares=1 \
+    -fsanitize=dataflow -fsanitize-blacklist=./dfsan_fuzzer_abi.list  \
+    test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o
+  ./a.out
+)
+*/
+
+#include "FuzzerInternal.h"
+#include <sanitizer/dfsan_interface.h>
+
+#include <cstring>
+#include <iostream>
+#include <unordered_map>
+
+extern "C" {
+__attribute__((weak))
+dfsan_label dfsan_create_label(const char *desc, void *userdata);
+__attribute__((weak))
+void dfsan_set_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+void dfsan_add_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
+}  // extern "C"
+
+namespace {
+
+// These values are copied from include/llvm/IR/InstrTypes.h.
+// We do not include the LLVM headers here to remain independent.
+// If these values ever change, an assertion in ComputeCmp will fail.
+enum Predicate {
+  ICMP_EQ = 32,  ///< equal
+  ICMP_NE = 33,  ///< not equal
+  ICMP_UGT = 34, ///< unsigned greater than
+  ICMP_UGE = 35, ///< unsigned greater or equal
+  ICMP_ULT = 36, ///< unsigned less than
+  ICMP_ULE = 37, ///< unsigned less or equal
+  ICMP_SGT = 38, ///< signed greater than
+  ICMP_SGE = 39, ///< signed greater or equal
+  ICMP_SLT = 40, ///< signed less than
+  ICMP_SLE = 41, ///< signed less or equal
+};
+
+template <class U, class S>
+bool ComputeCmp(size_t CmpType, U Arg1, U Arg2) {
+  switch(CmpType) {
+    case ICMP_EQ : return Arg1 == Arg2;
+    case ICMP_NE : return Arg1 != Arg2;
+    case ICMP_UGT: return Arg1 > Arg2;
+    case ICMP_UGE: return Arg1 >= Arg2;
+    case ICMP_ULT: return Arg1 < Arg2;
+    case ICMP_ULE: return Arg1 <= Arg2;
+    case ICMP_SGT: return (S)Arg1 > (S)Arg2;
+    case ICMP_SGE: return (S)Arg1 >= (S)Arg2;
+    case ICMP_SLT: return (S)Arg1 < (S)Arg2;
+    case ICMP_SLE: return (S)Arg1 <= (S)Arg2;
+    default: assert(0 && "unsupported CmpType");
+  }
+  return false;
+}
+
+static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1,
+                       uint64_t Arg2) {
+  if (CmpSize == 8) return ComputeCmp<uint64_t, int64_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2);
+  assert(0 && "unsupported type size");
+  return true;
+}
+
+// As a simplification we use the range of input bytes instead of a set of input
+// bytes.
+struct LabelRange {
+  uint16_t Beg, End;  // Range is [Beg, End), thus Beg==End is an empty range.
+
+  LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {}
+
+  static LabelRange Join(LabelRange LR1, LabelRange LR2) {
+    if (LR1.Beg == LR1.End) return LR2;
+    if (LR2.Beg == LR2.End) return LR1;
+    return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)};
+  }
+  LabelRange &Join(LabelRange LR) {
+    return *this = Join(*this, LR);
+  }
+  static LabelRange Singleton(const dfsan_label_info *LI) {
+    uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata;
+    assert(Idx > 0);
+    return {(uint16_t)(Idx - 1), Idx};
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const LabelRange &LR) {
+  return os << "[" << LR.Beg << "," << LR.End << ")";
+}
+
+class DFSanState {
+ public:
+   DFSanState(const fuzzer::Fuzzer::FuzzingOptions &Options)
+       : Options(Options) {}
+
+  struct CmpSiteInfo {
+    size_t ResCounters[2] = {0, 0};
+    size_t CmpSize = 0;
+    LabelRange LR;
+    std::unordered_map<uint64_t, size_t> CountedConstants;
+  };
+
+  LabelRange GetLabelRange(dfsan_label L);
+  void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                        uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+                        dfsan_label L2);
+  bool Mutate(fuzzer::Unit *U);
+
+ private:
+  std::unordered_map<uintptr_t, CmpSiteInfo> PcToCmpSiteInfoMap;
+  LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {};
+  const fuzzer::Fuzzer::FuzzingOptions &Options;
+};
+
+LabelRange DFSanState::GetLabelRange(dfsan_label L) {
+  LabelRange &LR = LabelRanges[L];
+  if (LR.Beg < LR.End || L == 0)
+    return LR;
+  const dfsan_label_info *LI = dfsan_get_label_info(L);
+  if (LI->l1 || LI->l2)
+    return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2));
+  return LR = LabelRange::Singleton(LI);
+}
+
+void DFSanState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                                  uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+                                  dfsan_label L2) {
+  if (L1 == 0 && L2 == 0)
+    return;  // Not actionable.
+  if (L1 != 0 && L2 != 0)
+    return;  // Probably still actionable.
+  bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2);
+  CmpSiteInfo &CSI = PcToCmpSiteInfoMap[PC];
+  CSI.CmpSize = CmpSize;
+  CSI.LR.Join(GetLabelRange(L1)).Join(GetLabelRange(L2));
+  if (!L1) CSI.CountedConstants[Arg1]++;
+  if (!L2) CSI.CountedConstants[Arg2]++;
+  size_t Counter = CSI.ResCounters[Res]++;
+
+  if (Options.Verbosity >= 2  &&
+      (Counter & (Counter - 1)) == 0 &&
+      CSI.ResCounters[!Res] == 0)
+    std::cerr << "DFSAN:"
+              << " PC " << std::hex << PC << std::dec
+              << " S " << CmpSize
+              << " T " << CmpType
+              << " A1 " << Arg1 << " A2 " << Arg2 << " R " << Res
+              << " L" << L1 << GetLabelRange(L1)
+              << " L" << L2 << GetLabelRange(L2)
+              << " LR " << CSI.LR
+              << "\n";
+}
+
+bool DFSanState::Mutate(fuzzer::Unit *U) {
+  for (auto &PCToCmp : PcToCmpSiteInfoMap) {
+    auto &CSI = PCToCmp.second;
+    if (CSI.ResCounters[0] * CSI.ResCounters[1] != 0) continue;
+    if (CSI.ResCounters[0] + CSI.ResCounters[1] < 1000) continue;
+    if (CSI.CountedConstants.size() != 1) continue;
+    uintptr_t C = CSI.CountedConstants.begin()->first;
+    if (U->size() >= CSI.CmpSize) {
+      size_t RangeSize = CSI.LR.End - CSI.LR.Beg;
+      size_t Idx = CSI.LR.Beg + rand() % RangeSize;
+      if (Idx + CSI.CmpSize > U->size()) continue;
+      C += rand() % 5 - 2;
+      memcpy(U->data() + Idx, &C, CSI.CmpSize);
+      return true;
+    }
+  }
+  return false;
+}
+
+static DFSanState *DFSan;
+
+}  // namespace
+
+namespace fuzzer {
+
+bool Fuzzer::MutateWithDFSan(Unit *U) {
+  if (!&dfsan_create_label || !DFSan) return false;
+  return DFSan->Mutate(U);
+}
+
+void Fuzzer::InitializeDFSan() {
+  if (!&dfsan_create_label || !Options.UseDFSan) return;
+  DFSan = new DFSanState(Options);
+  CurrentUnit.resize(Options.MaxLen);
+  for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) {
+    dfsan_label L = dfsan_create_label("input", (void*)(i + 1));
+    // We assume that no one else has called dfsan_create_label before.
+    assert(L == i + 1);
+    dfsan_set_label(L, &CurrentUnit[i], 1);
+  }
+}
+
+}  // namespace fuzzer
+
+extern "C" {
+void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
+                                      uint64_t Arg2, dfsan_label L0,
+                                      dfsan_label L1, dfsan_label L2) {
+  assert(L0 == 0);
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  uint64_t CmpSize = (SizeAndType >> 32) / 8;
+  uint64_t Type = (SizeAndType << 32) >> 32;
+  DFSan->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2);
+}
+}  // extern "C"
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 9ccd744..05a699e 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -18,6 +18,10 @@
 #include <thread>
 #include <atomic>
 #include <mutex>
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <iterator>
 
 namespace fuzzer {
 
@@ -26,19 +30,26 @@ struct FlagDescription {
   const char *Name;
   const char *Description;
   int   Default;
-  int   *Flag;
+  int   *IntFlag;
+  const char **StrFlag;
 };
 
 struct {
-#define FUZZER_FLAG(Type, Name, Default, Description) Type Name;
+#define FUZZER_FLAG_INT(Name, Default, Description) int Name;
+#define FUZZER_FLAG_STRING(Name, Description) const char *Name;
 #include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
 } Flags;
 
 static FlagDescription FlagDescriptions [] {
-#define FUZZER_FLAG(Type, Name, Default, Description) {#Name, Description, Default, &Flags.Name},
+#define FUZZER_FLAG_INT(Name, Default, Description)                            \
+  { #Name, Description, Default, &Flags.Name, nullptr},
+#define FUZZER_FLAG_STRING(Name, Description)                                  \
+  { #Name, Description, 0, nullptr, &Flags.Name },
 #include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
 };
 
 static const size_t kNumFlags =
@@ -79,11 +90,18 @@ static bool ParseOneFlag(const char *Param) {
     const char *Name = FlagDescriptions[F].Name;
     const char *Str = FlagValue(Param, Name);
     if (Str)  {
-      int Val = std::stol(Str);
-      *FlagDescriptions[F].Flag = Val;
-      if (Flags.verbosity >= 2)
-        std::cerr << "Flag: " << Name << " " << Val << "\n";
-      return true;
+      if (FlagDescriptions[F].IntFlag) {
+        int Val = std::stol(Str);
+        *FlagDescriptions[F].IntFlag = Val;
+        if (Flags.verbosity >= 2)
+          std::cerr << "Flag: " << Name << " " << Val << "\n";
+        return true;
+      } else if (FlagDescriptions[F].StrFlag) {
+        *FlagDescriptions[F].StrFlag = Str;
+        if (Flags.verbosity >= 2)
+          std::cerr << "Flag: " << Name << " " << Str << "\n";
+        return true;
+      }
     }
   }
   PrintHelp();
@@ -92,8 +110,12 @@ static bool ParseOneFlag(const char *Param) {
 
 // We don't use any library to minimize dependencies.
 static void ParseFlags(int argc, char **argv) {
-  for (size_t F = 0; F < kNumFlags; F++)
-    *FlagDescriptions[F].Flag = FlagDescriptions[F].Default;
+  for (size_t F = 0; F < kNumFlags; F++) {
+    if (FlagDescriptions[F].IntFlag)
+      *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default;
+    if (FlagDescriptions[F].StrFlag)
+      *FlagDescriptions[F].StrFlag = nullptr;
+  }
   for (int A = 1; A < argc; A++) {
     if (ParseOneFlag(argv[A])) continue;
     inputs.push_back(argv[A]);
@@ -139,6 +161,26 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers,
   return HasErrors ? 1 : 0;
 }
 
+std::vector<std::string> ReadTokensFile(const char *TokensFilePath) {
+  if (!TokensFilePath) return {};
+  std::string TokensFileContents = FileToString(TokensFilePath);
+  std::istringstream ISS(TokensFileContents);
+  std::vector<std::string> Res = {std::istream_iterator<std::string>{ISS},
+                                  std::istream_iterator<std::string>{}};
+  Res.push_back(" ");
+  Res.push_back("\t");
+  Res.push_back("\n");
+  return Res;
+}
+
+int ApplyTokens(const Fuzzer &F, const char *InputFilePath) {
+  Unit U = FileToVector(InputFilePath);
+  auto T = F.SubstituteTokens(U);
+  T.push_back(0);
+  std::cout << T.data();
+  return 0;
+}
+
 int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   using namespace fuzzer;
 
@@ -161,8 +203,10 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   Options.UseCounters = Flags.use_counters;
   Options.UseFullCoverageSet = Flags.use_full_coverage_set;
   Options.UseCoveragePairs = Flags.use_coverage_pairs;
+  Options.UseDFSan = Flags.dfsan;
   Options.PreferSmallDuringInitialShuffle =
       Flags.prefer_small_during_initial_shuffle;
+  Options.Tokens = ReadTokensFile(Flags.tokens);
   if (Flags.runs >= 0)
     Options.MaxNumberOfRuns = Flags.runs;
   if (!inputs.empty())
@@ -181,6 +225,16 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   if (Flags.timeout > 0)
     SetTimer(Flags.timeout);
 
+  if (Flags.verbosity >= 2) {
+    std::cerr << "Tokens: {";
+    for (auto &T : Options.Tokens)
+      std::cerr << T << ",";
+    std::cerr << "}\n";
+  }
+
+  if (Flags.apply_tokens)
+    return ApplyTokens(F, Flags.apply_tokens);
+
   for (auto &inp : inputs)
     F.ReadDir(inp);
 
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 08176af..dbaf75d 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -6,41 +6,48 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-// Flags. FUZZER_FLAG macro should be defined at the point of inclusion.
-// We are not using any flag parsing library for better portability and
-// independence.
+// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the
+// point of inclusion. We are not using any flag parsing library for better
+// portability and independence.
 //===----------------------------------------------------------------------===//
-FUZZER_FLAG(int, verbosity, 1, "Verbosity level.")
-FUZZER_FLAG(int, seed, 0, "Random seed. If 0, seed is generated.")
-FUZZER_FLAG(int, iterations, -1,
+FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.")
+FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.")
+FUZZER_FLAG_INT(iterations, -1,
             "Number of iterations of the fuzzer internal loop"
             " (-1 for infinite iterations).")
-FUZZER_FLAG(int, runs, -1,
+FUZZER_FLAG_INT(runs, -1,
             "Number of individual test runs (-1 for infinite runs).")
-FUZZER_FLAG(int, max_len, 64, "Maximal length of the test input.")
-FUZZER_FLAG(int, cross_over, 1, "If 1, cross over inputs.")
-FUZZER_FLAG(int, mutate_depth, 5,
+FUZZER_FLAG_INT(max_len, 64, "Maximal length of the test input.")
+FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
+FUZZER_FLAG_INT(mutate_depth, 5,
             "Apply this number of consecutive mutations to each input.")
-FUZZER_FLAG(
-    int, prefer_small_during_initial_shuffle, -1,
+FUZZER_FLAG_INT(
+    prefer_small_during_initial_shuffle, -1,
     "If 1, always prefer smaller inputs during the initial corpus shuffle."
     " If 0, never do that. If -1, do it sometimes.")
-FUZZER_FLAG(int, exit_on_first, 0,
+FUZZER_FLAG_INT(exit_on_first, 0,
             "If 1, exit after the first new interesting input is found.")
-FUZZER_FLAG(int, timeout, -1, "Timeout in seconds (if positive).")
-FUZZER_FLAG(int, help, 0, "Print help.")
-FUZZER_FLAG(
-    int, save_minimized_corpus, 0,
+FUZZER_FLAG_INT(timeout, -1, "Timeout in seconds (if positive).")
+FUZZER_FLAG_INT(help, 0, "Print help.")
+FUZZER_FLAG_INT(
+    save_minimized_corpus, 0,
     "If 1, the minimized corpus is saved into the first input directory")
-FUZZER_FLAG(int, use_counters, 0, "Use coverage counters")
-FUZZER_FLAG(int, use_full_coverage_set, 0,
+FUZZER_FLAG_INT(use_counters, 0, "Use coverage counters")
+FUZZER_FLAG_INT(use_full_coverage_set, 0,
             "Experimental: Maximize the number of different full"
             " coverage sets as opposed to maximizing the total coverage."
             " This is potentially MUCH slower, but may discover more paths.")
-FUZZER_FLAG(int, use_coverage_pairs, 0,
+FUZZER_FLAG_INT(use_coverage_pairs, 0,
             "Experimental: Maximize the number of different coverage pairs.")
-FUZZER_FLAG(int, jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
+FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
                           " this number of jobs in separate worker processes"
                           " with stdout/stderr redirected to fuzz-JOB.log.")
-FUZZER_FLAG(int, workers, 0,
+FUZZER_FLAG_INT(workers, 0,
             "Number of simultaneous worker processes to run the jobs.")
+FUZZER_FLAG_INT(dfsan, 1, "Use DFSan for taint-guided mutations. No-op unless "
+                           "the DFSan instrumentation was compiled in.")
+
+FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to"
+                           " fuzz a token based input language.")
+FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes "
+                                 " with tokens and write the result to stdout.")
diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp
index 224808c..ef23d42 100644
--- a/lib/Fuzzer/FuzzerIO.cpp
+++ b/lib/Fuzzer/FuzzerIO.cpp
@@ -33,6 +33,12 @@ Unit FileToVector(const std::string &Path) {
               std::istreambuf_iterator<char>());
 }
 
+std::string FileToString(const std::string &Path) {
+  std::ifstream T(Path);
+  return std::string((std::istreambuf_iterator<char>(T)),
+                     std::istreambuf_iterator<char>());
+}
+
 void CopyFileToErr(const std::string &Path) {
   std::ifstream T(Path);
   std::copy(std::istreambuf_iterator<char>(T), std::istreambuf_iterator<char>(),
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index e4e5eb7..7787109 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -23,7 +23,8 @@ namespace fuzzer {
 typedef std::vector<uint8_t> Unit;
 using namespace std::chrono;
 
-Unit ReadFile(const char *Path);
+std::string FileToString(const std::string &Path);
+Unit FileToVector(const std::string &Path);
 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V);
 void WriteToFile(const Unit &U, const std::string &Path);
 void CopyFileToErr(const std::string &Path);
@@ -51,17 +52,17 @@ class Fuzzer {
     bool UseCounters = false;
     bool UseFullCoverageSet  = false;
     bool UseCoveragePairs = false;
+    bool UseDFSan = false;
     int PreferSmallDuringInitialShuffle = -1;
     size_t MaxNumberOfRuns = ULONG_MAX;
     std::string OutputCorpus;
+    std::vector<std::string> Tokens;
   };
-  Fuzzer(UserCallback Callback, FuzzingOptions Options)
-      : Callback(Callback), Options(Options) {
-    SetDeathCallback();
-  }
+  Fuzzer(UserCallback Callback, FuzzingOptions Options);
   void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
   size_t Loop(size_t NumIterations);
   void ShuffleAndMinimize();
+  void InitializeDFSan();
   size_t CorpusSize() const { return Corpus.size(); }
   void ReadDir(const std::string &Path) {
     ReadDirToVectorOfUnits(Path.c_str(), &Corpus);
@@ -76,20 +77,28 @@ class Fuzzer {
 
   size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
 
-  static void AlarmCallback();
+  static void StaticAlarmCallback();
+
+  Unit SubstituteTokens(const Unit &U) const;
 
  private:
+  void AlarmCallback();
+  void ExecuteCallback(const Unit &U);
   size_t MutateAndTestOne(Unit *U);
   size_t RunOne(const Unit &U);
   size_t RunOneMaximizeTotalCoverage(const Unit &U);
   size_t RunOneMaximizeFullCoverageSet(const Unit &U);
   size_t RunOneMaximizeCoveragePairs(const Unit &U);
   void WriteToOutputCorpus(const Unit &U);
-  static void WriteToCrash(const Unit &U, const char *Prefix);
+  void WriteToCrash(const Unit &U, const char *Prefix);
+  bool MutateWithDFSan(Unit *U);
+  void PrintStats(const char *Where, size_t Cov, const char *End = "\n");
+  void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = "");
 
   void SetDeathCallback();
-  static void DeathCallback();
-  static Unit CurrentUnit;
+  static void StaticDeathCallback();
+  void DeathCallback();
+  Unit CurrentUnit;
 
   size_t TotalNumberOfRuns = 0;
 
@@ -108,7 +117,8 @@ class Fuzzer {
   UserCallback Callback;
   FuzzingOptions Options;
   system_clock::time_point ProcessStartTime = system_clock::now();
-  static system_clock::time_point UnitStartTime;
+  system_clock::time_point UnitStartTime;
+  long TimeOfLongestUnitInSeconds = 0;
 };
 
 };  // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 563fbf4..9dfe30b 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -16,21 +16,49 @@
 
 namespace fuzzer {
 
-// static
-Unit Fuzzer::CurrentUnit;
-system_clock::time_point Fuzzer::UnitStartTime;
+// Only one Fuzzer per process.
+static Fuzzer *F;
+
+Fuzzer::Fuzzer(UserCallback Callback, FuzzingOptions Options)
+    : Callback(Callback), Options(Options) {
+  SetDeathCallback();
+  InitializeDFSan();
+  assert(!F);
+  F = this;
+}
 
 void Fuzzer::SetDeathCallback() {
-  __sanitizer_set_death_callback(DeathCallback);
+  __sanitizer_set_death_callback(StaticDeathCallback);
+}
+
+void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) {
+  if (Options.Tokens.empty()) {
+    PrintASCII(U, PrintAfter);
+  } else {
+    auto T = SubstituteTokens(U);
+    T.push_back(0);
+    std::cerr << T.data();
+    std::cerr << PrintAfter;
+  }
+}
+
+void Fuzzer::StaticDeathCallback() {
+  assert(F);
+  F->DeathCallback();
 }
 
 void Fuzzer::DeathCallback() {
   std::cerr << "DEATH: " <<  std::endl;
   Print(CurrentUnit, "\n");
-  PrintASCII(CurrentUnit, "\n");
+  PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
   WriteToCrash(CurrentUnit, "crash-");
 }
 
+void Fuzzer::StaticAlarmCallback() {
+  assert(F);
+  F->AlarmCallback();
+}
+
 void Fuzzer::AlarmCallback() {
   size_t Seconds =
       duration_cast<seconds>(system_clock::now() - UnitStartTime).count();
@@ -38,27 +66,40 @@ void Fuzzer::AlarmCallback() {
             << std::endl;
   if (Seconds >= 3) {
     Print(CurrentUnit, "\n");
-    PrintASCII(CurrentUnit, "\n");
+    PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
     WriteToCrash(CurrentUnit, "timeout-");
   }
   exit(1);
 }
 
+void Fuzzer::PrintStats(const char *Where, size_t Cov, const char *End) {
+  if (!Options.Verbosity) return;
+  size_t Seconds = secondsSinceProcessStartUp();
+  size_t ExecPerSec = (Seconds ? TotalNumberOfRuns / Seconds : 0);
+  std::cerr
+      << "#" << TotalNumberOfRuns
+      << "\t" << Where
+      << " cov " << Cov
+      << " bits " << TotalBits()
+      << " units " << Corpus.size()
+      << " exec/s " << ExecPerSec
+      << End;
+}
+
 void Fuzzer::ShuffleAndMinimize() {
+  size_t MaxCov = 0;
   bool PreferSmall =
       (Options.PreferSmallDuringInitialShuffle == 1 ||
        (Options.PreferSmallDuringInitialShuffle == -1 && rand() % 2));
   if (Options.Verbosity)
-    std::cerr << "Shuffle: Size: " << Corpus.size()
-              << " prefer small: " << PreferSmall
-              << "\n";
+    std::cerr << "PreferSmall: " << PreferSmall << "\n";
+  PrintStats("READ  ", 0);
   std::vector<Unit> NewCorpus;
   std::random_shuffle(Corpus.begin(), Corpus.end());
   if (PreferSmall)
     std::stable_sort(
         Corpus.begin(), Corpus.end(),
         [](const Unit &A, const Unit &B) { return A.size() < B.size(); });
-  size_t MaxCov = 0;
   Unit &U = CurrentUnit;
   for (const auto &C : Corpus) {
     for (size_t First = 0; First < 1; First++) {
@@ -77,18 +118,29 @@ void Fuzzer::ShuffleAndMinimize() {
     }
   }
   Corpus = NewCorpus;
-  if (Options.Verbosity)
-    std::cerr << "Shuffle done: " << Corpus.size() << " IC: " << MaxCov << "\n";
+  PrintStats("INITED", MaxCov);
 }
 
 size_t Fuzzer::RunOne(const Unit &U) {
   UnitStartTime = system_clock::now();
   TotalNumberOfRuns++;
+  size_t Res = 0;
   if (Options.UseFullCoverageSet)
-    return RunOneMaximizeFullCoverageSet(U);
-  if (Options.UseCoveragePairs)
-    return RunOneMaximizeCoveragePairs(U);
-  return RunOneMaximizeTotalCoverage(U);
+    Res = RunOneMaximizeFullCoverageSet(U);
+  else if (Options.UseCoveragePairs)
+    Res = RunOneMaximizeCoveragePairs(U);
+  else
+    Res = RunOneMaximizeTotalCoverage(U);
+  auto UnitStopTime = system_clock::now();
+  auto TimeOfUnit =
+      duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
+  if (TimeOfUnit > TimeOfLongestUnitInSeconds) {
+    TimeOfLongestUnitInSeconds = TimeOfUnit;
+    std::cerr << "Longest unit: " << TimeOfLongestUnitInSeconds
+              << " s:\n";
+    Print(U, "\n");
+  }
+  return Res;
 }
 
 static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
@@ -99,12 +151,35 @@ static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
   return Res;
 }
 
+Unit Fuzzer::SubstituteTokens(const Unit &U) const {
+  Unit Res;
+  for (auto Idx : U) {
+    if (Idx < Options.Tokens.size()) {
+      std::string Token = Options.Tokens[Idx];
+      Res.insert(Res.end(), Token.begin(), Token.end());
+    } else {
+      Res.push_back(' ');
+    }
+  }
+  // FIXME: Apply DFSan labels.
+  return Res;
+}
+
+void Fuzzer::ExecuteCallback(const Unit &U) {
+  if (Options.Tokens.empty()) {
+    Callback(U.data(), U.size());
+  } else {
+    auto T = SubstituteTokens(U);
+    Callback(T.data(), T.size());
+  }
+}
+
 // Experimental. Does not yet scale.
 // Fuly reset the current coverage state, run a single unit,
 // collect all coverage pairs and return non-zero if a new pair is observed.
 size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
   __sanitizer_reset_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   uintptr_t *PCs;
   uintptr_t NumPCs = __sanitizer_get_coverage_guards(&PCs);
   bool HasNewPairs = false;
@@ -129,7 +204,7 @@ size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
 // e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale.
 size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) {
   __sanitizer_reset_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   uintptr_t *PCs;
   uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs);
   if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second)
@@ -144,21 +219,16 @@ size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) {
     __sanitizer_update_counter_bitset_and_clear_counters(0);
   }
   size_t OldCoverage = __sanitizer_get_total_unique_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   size_t NewCoverage = __sanitizer_get_total_unique_coverage();
   size_t NumNewBits = 0;
   if (Options.UseCounters)
     NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters(
         CounterBitmap.data());
 
-  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) {
-    size_t Seconds = secondsSinceProcessStartUp();
-    std::cerr
-        << "#" << TotalNumberOfRuns
-        << "\tcov: " << NewCoverage
-        << "\tbits: " << TotalBits()
-        << "\texec/s: " << (Seconds ? TotalNumberOfRuns / Seconds : 0) << "\n";
-  }
+  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity)
+    PrintStats("pulse ", NewCoverage);
+
   if (NewCoverage > OldCoverage || NumNewBits)
     return NewCoverage;
   return 0;
@@ -192,22 +262,18 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) {
   for (int i = 0; i < Options.MutateDepth; i++) {
     if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
       return NewUnits;
+    MutateWithDFSan(U);
     Mutate(U, Options.MaxLen);
     size_t NewCoverage = RunOne(*U);
     if (NewCoverage) {
       Corpus.push_back(*U);
       NewUnits++;
+      PrintStats("NEW   ", NewCoverage, "");
       if (Options.Verbosity) {
-        std::cerr << "#" << TotalNumberOfRuns
-                  << "\tNEW: " << NewCoverage
-                  << " B: " << TotalBits()
-                  << " L: " << U->size()
-                  << " S: " << Corpus.size()
-                  << " I: " << i
-                  << "\t";
+        std::cerr << " L: " << U->size();
         if (U->size() < 30) {
-          PrintASCII(*U);
-          std::cerr << "\t";
+          std::cerr << " ";
+          PrintUnitInASCIIOrTokens(*U, "\t");
           Print(*U);
         }
         std::cerr << "\n";
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index 679f289..3635f39 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -19,15 +19,18 @@
 namespace fuzzer {
 
 void Print(const Unit &v, const char *PrintAfter) {
-  std::cerr << v.size() << ": ";
   for (auto x : v)
-    std::cerr << (unsigned) x << " ";
+    std::cerr << "0x" << std::hex << (unsigned) x << std::dec << ",";
   std::cerr << PrintAfter;
 }
 
 void PrintASCII(const Unit &U, const char *PrintAfter) {
-  for (auto X : U)
-    std::cerr << (char)((isascii(X) && X >= ' ') ? X : '?');
+  for (auto X : U) {
+    if (isprint(X))
+      std::cerr << X;
+    else
+      std::cerr << "\\x" << std::hex << (int)(unsigned)X << std::dec;
+  }
   std::cerr << PrintAfter;
 }
 
@@ -43,7 +46,7 @@ std::string Hash(const Unit &in) {
 }
 
 static void AlarmHandler(int, siginfo_t *, void *) {
-  Fuzzer::AlarmCallback();
+  Fuzzer::StaticAlarmCallback();
 }
 
 void SetTimer(int Seconds) {
diff --git a/lib/Fuzzer/README.txt b/lib/Fuzzer/README.txt
index e4d6b4f..79f49b5 100644
--- a/lib/Fuzzer/README.txt
+++ b/lib/Fuzzer/README.txt
@@ -1,112 +1,2 @@
-===============================
-Fuzzer -- a library for coverage-guided fuzz testing.
-===============================
+Move to http://llvm.org/docs/LibFuzzer.html
 
-This library is intended primarily for in-process coverage-guided fuzz testing
-(fuzzing) of other libraries. The typical workflow looks like this:
-
-  * Build the Fuzzer library as a static archive (or just a set of .o files).
-    Note that the Fuzzer contains the main() function.
-    Preferably do *not* use sanitizers while building the Fuzzer.
-  * Build the library you are going to test with -fsanitize-coverage=[234]
-    and one of the sanitizers. We recommend to build the library in several
-    different modes (e.g. asan, msan, lsan, ubsan, etc) and even using different
-    optimizations options (e.g. -O0, -O1, -O2) to diversify testing.
-  * Build a test driver using the same options as the library.
-    The test driver is a C/C++ file containing interesting calls to the library
-    inside a single function:
-    extern "C" void TestOneInput(const uint8_t *Data, size_t Size);
-  * Link the Fuzzer, the library and the driver together into an executable
-    using the same sanitizer options as for the library.
-  * Collect the initial corpus of inputs for the
-    fuzzer (a directory with test inputs, one file per input).
-    The better your inputs are the faster you will find something interesting.
-    Also try to keep your inputs small, otherwise the Fuzzer will run too slow.
-  * Run the fuzzer with the test corpus. As new interesting test cases are
-    discovered they will be added to the corpus. If a bug is discovered by
-    the sanitizer (asan, etc) it will be reported as usual and the reproducer
-    will be written to disk.
-    Each Fuzzer process is single-threaded (unless the library starts its own
-    threads). You can run the Fuzzer on the same corpus in multiple processes.
-    in parallel. For run-time options run the Fuzzer binary with '-help=1'.
-
-
-The Fuzzer is similar in concept to AFL (http://lcamtuf.coredump.cx/afl/),
-but uses in-process Fuzzing, which is more fragile, more restrictive, but
-potentially much faster as it has no overhead for process start-up.
-It uses LLVM's "Sanitizer Coverage" instrumentation to get in-process
-coverage-feedback https://code.google.com/p/address-sanitizer/wiki/AsanCoverage
-
-The code resides in the LLVM repository and is (or will be) used by various
-parts of LLVM, but the Fuzzer itself does not (and should not) depend on any
-part of LLVM and can be used for other projects. Ideally, the Fuzzer's code
-should not have any external dependencies. Right now it uses STL, which may need
-to be fixed later. See also F.A.Q. below.
-
-Examples of usage in LLVM:
-  * clang-format-fuzzer. The inputs are random pieces of C++-like text.
-  * Build (make sure to use fresh clang as the host compiler):
-    cmake -GNinja  -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-    -DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=YES \
-    /path/to/llvm -DCMAKE_BUILD_TYPE=Release
-    ninja clang-format-fuzzer
-  * Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc)
-  * TODO: commit the pre-fuzzed corpus to svn (?).
-  * Run:
-      clang-format-fuzzer CORPUS_DIR
-
-Toy example (see SimpleTest.cpp):
-a simple function that does something interesting if it receives bytes "Hi!".
-  # Build the Fuzzer with asan:
-  % clang++ -std=c++11 -fsanitize=address -fsanitize-coverage=3 -O1 -g \
-     Fuzzer*.cpp test/SimpleTest.cpp
-  # Run the fuzzer with no corpus (assuming on empty input)
-  % ./a.out
-
-===============================================================================
-F.A.Q.
-
-Q. Why Fuzzer does not use any of the LLVM support?
-A. There are two reasons.
-First, we want this library to be used outside of the LLVM w/o users having to
-build the rest of LLVM. This may sound unconvincing for many LLVM folks,
-but in practice the need for building the whole LLVM frightens many potential
-users -- and we want more users to use this code.
-Second, there is a subtle technical reason not to rely on the rest of LLVM, or
-any other large body of code (maybe not even STL). When coverage instrumentation
-is enabled, it will also instrument the LLVM support code which will blow up the
-coverage set of the process (since the fuzzer is in-process). In other words, by
-using more external dependencies we will slow down the fuzzer while the main
-reason for it to exist is extreme speed.
-
-Q. What about Windows then? The Fuzzer contains code that does not build on
-Windows.
-A. The sanitizer coverage support does not work on Windows either as of 01/2015.
-Once it's there, we'll need to re-implement OS-specific parts (I/O, signals).
-
-Q. When this Fuzzer is not a good solution for a problem?
-A.
-  * If the test inputs are validated by the target library and the validator
-    asserts/crashes on invalid inputs, the in-process fuzzer is not applicable
-    (we could use fork() w/o exec, but it comes with extra overhead).
-  * Bugs in the target library may accumulate w/o being detected. E.g. a memory
-    corruption that goes undetected at first and then leads to a crash while
-    testing another input. This is why it is highly recommended to run this
-    in-process fuzzer with all sanitizers to detect most bugs on the spot.
-  * It is harder to protect the in-process fuzzer from excessive memory
-    consumption and infinite loops in the target library (still possible).
-  * The target library should not have significant global state that is not
-    reset between the runs.
-  * Many interesting target libs are not designed in a way that supports
-    the in-process fuzzer interface (e.g. require a file path instead of a
-    byte array).
-  * If a single test run takes a considerable fraction of a second (or
-    more) the speed benefit from the in-process fuzzer is negligible.
-  * If the target library runs persistent threads (that outlive
-    execution of one test) the fuzzing results will be unreliable.
-
-Q. So, what exactly this Fuzzer is good for?
-A. This Fuzzer might be a good choice for testing libraries that have relatively
-small inputs, each input takes < 1ms to run, and the library code is not expected
-to crash on invalid inputs.
-Examples: regular expression matchers, text or binary format parsers.
diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt
new file mode 100644
index 0000000..f3c4f80
--- /dev/null
+++ b/lib/Fuzzer/cxx_fuzzer_tokens.txt
@@ -0,0 +1,218 @@
+#
+##
+`
+~
+!
+@
+$
+%
+^
+&
+*
+(
+)
+_
+-
+_
+=
++
+{
+}
+[
+]
+|
+\
+,
+.
+/
+?
+>
+<
+;
+:
+'
+"
+++
+--
+<<
+>>
++=
+-=
+*=
+/=
+>>=
+<<=
+&=
+|=
+^=
+%=
+!=
+&&
+||
+==
+>=
+<=
+->
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+alignas
+alignof
+and
+and_eq
+asm
+auto
+bitand
+bitor
+bool
+break
+case
+catch
+char
+char16_t
+char32_t
+class
+compl
+concept
+const
+constexpr
+const_cast
+continue
+decltype
+default
+delete
+do
+double
+dynamic_cast
+else
+enum
+explicit
+export
+extern
+false
+float
+for
+friend
+goto
+if
+inline
+int
+long
+mutable
+namespace
+new
+noexcept
+not
+not_eq
+nullptr
+operator
+or
+or_eq
+private
+protected
+public
+register
+reinterpret_cast
+requires
+return
+short
+signed
+sizeof
+static
+static_assert
+static_cast
+struct
+switch
+template
+this
+thread_local
+throw
+true
+try
+typedef
+typeid
+typename
+union
+unsigned
+using
+virtual
+void
+volatile
+wchar_t
+while
+xor
+xor_eq
+if
+elif
+else
+endif
+defined
+ifdef
+ifndef
+define
+undef
+include
+line
+error
+pragma
+override
+final
diff --git a/lib/Fuzzer/dfsan_fuzzer_abi.list b/lib/Fuzzer/dfsan_fuzzer_abi.list
new file mode 100644
index 0000000..7da7522
--- /dev/null
+++ b/lib/Fuzzer/dfsan_fuzzer_abi.list
@@ -0,0 +1,12 @@
+# Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp
+fun:__sanitizer_cov_trace_cmp=custom
+fun:__sanitizer_cov_trace_cmp=uninstrumented
+
+# Ignores coverage callbacks.
+fun:__sanitizer_cov=uninstrumented
+fun:__sanitizer_cov=discard
+fun:__sanitizer_cov_module_init=uninstrumented
+fun:__sanitizer_cov_module_init=discard
+
+# Don't add extra parameters to the Fuzzer callback.
+fun:TestOneInput=uninstrumented
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index 08130c6..fb3bf20 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -2,10 +2,11 @@
 # basic blocks and we'll fail to discover the targets.
 # Also enable the coverage instrumentation back (it is disabled
 # for the Fuzzer lib)
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0 -fsanitize-coverage=4")
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=4")
 
 set(Tests
   CounterTest
+  CxxTokensTest
   FourIndependentBranchesTest
   FullCoverageSetTest
   InfiniteTest
@@ -14,11 +15,14 @@ set(Tests
   TimeoutTest
   )
 
+set(DFSanTests
+  DFSanSimpleCmpTest
+  )
+
 set(TestBinaries)
 
 foreach(Test ${Tests})
   add_executable(LLVMFuzzer-${Test}
-    EXCLUDE_FROM_ALL
     ${Test}.cpp
     )
   target_link_libraries(LLVMFuzzer-${Test}
@@ -52,6 +56,13 @@ target_link_libraries(LLVMFuzzer-Unittest
 
 set(TestBinaries ${TestBinaries} LLVMFuzzer-Unittest)
 
+add_subdirectory(dfsan)
+
+foreach(Test ${DFSanTests})
+  set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test})
+endforeach()
+
+
 set_target_properties(${TestBinaries}
   PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp
new file mode 100644
index 0000000..1addccb
--- /dev/null
+++ b/lib/Fuzzer/test/CxxTokensTest.cpp
@@ -0,0 +1,24 @@
+// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+static void Found() {
+  std::cout << "Found the target, exiting\n";
+  exit(1);
+}
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+  // looking for "thread_local unsigned A;"
+  if (Size < 24) return;
+  if (0 == memcmp(&Data[0], "thread_local", 12))
+    if (Data[12] == ' ')
+      if (0 == memcmp(&Data[13], "unsigned", 8))
+        if (Data[21] == ' ')
+          if (Data[22] == 'A')
+            if (Data[23] == ';')
+              Found();
+}
+
diff --git a/lib/Fuzzer/test/dfsan/CMakeLists.txt b/lib/Fuzzer/test/dfsan/CMakeLists.txt
new file mode 100644
index 0000000..b5b874f
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/CMakeLists.txt
@@ -0,0 +1,17 @@
+# These tests depend on both coverage and dfsan instrumentation.
+
+set(DFSAN_FUZZER_ABI_LIST "${CMAKE_CURRENT_SOURCE_DIR}/../../dfsan_fuzzer_abi.list")
+
+set(CMAKE_CXX_FLAGS_RELEASE
+  "${LIBFUZZER_FLAGS_BASE} -O0 -fno-sanitize=all -fsanitize=dataflow -mllvm -sanitizer-coverage-experimental-trace-compares=1 -fsanitize-blacklist=${DFSAN_FUZZER_ABI_LIST}")
+
+foreach(Test ${DFSanTests})
+  set_source_files_properties(${Test}.cpp PROPERTIES OBJECT_DEPENDS ${DFSAN_FUZZER_ABI_LIST})
+  add_executable(LLVMFuzzer-${Test}
+    ${Test}.cpp
+    )
+  target_link_libraries(LLVMFuzzer-${Test}
+    LLVMFuzzer
+    )
+endforeach()
+
diff --git a/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
new file mode 100644
index 0000000..1162092
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
@@ -0,0 +1,30 @@
+// Simple test for a fuzzer. The fuzzer must find several narrow ranges.
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 14) return;
+  uint64_t x = 0;
+  int64_t  y = 0;
+  int z = 0;
+  unsigned short a = 0;
+  memcpy(&x, Data, 8);
+  memcpy(&y, Data + Size - 8, 8);
+  memcpy(&z, Data + Size / 2, sizeof(z));
+  memcpy(&a, Data + Size / 2 + 4, sizeof(a));
+
+  if (x > 1234567890 &&
+      x < 1234567895 &&
+      y >= 987654321 &&
+      y <= 987654325 &&
+      z < -10000 &&
+      z >= -10005 &&
+      z != -10003 &&
+      a == 4242) {
+    fprintf(stderr, "Found the target: size %zd (%zd, %zd, %d, %d), exiting.\n",
+            Size, x, y, z, a);
+    exit(1);
+  }
+}
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index 45691f5..2a0e95f 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -20,3 +20,9 @@ FourIndependentBranchesTest: BINGO
 
 RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=CounterTest
 CounterTest: BINGO
+
+RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=DFSanSimpleCmpTest
+DFSanSimpleCmpTest: Found the target:
+
+RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s --check-prefix=CxxTokensTest
+CxxTokensTest: Found the target, exiting
diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk
index 2ca02f7..32ebfae 100644
--- a/lib/IR/Android.mk
+++ b/lib/IR/Android.mk
@@ -43,7 +43,6 @@ vmcore_SRC_FILES := \
   Type.cpp \
   TypeFinder.cpp \
   Use.cpp \
-  UseListOrder.cpp \
   User.cpp \
   Value.cpp \
   ValueSymbolTable.cpp \
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index ae0beba..48737b5 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -401,9 +401,7 @@ public:
   /// NumberedTypes - The numbered types, along with their value.
   DenseMap<StructType*, unsigned> NumberedTypes;
 
-
-  TypePrinting() {}
-  ~TypePrinting() {}
+  TypePrinting() = default;
 
   void incorporateTypes(const Module &M);
 
@@ -1443,11 +1441,11 @@ void MDFieldPrinter::printDIFlags(StringRef Name, unsigned Flags) {
   Out << FS << Name << ": ";
 
   SmallVector<unsigned, 8> SplitFlags;
-  unsigned Extra = DIDescriptor::splitFlags(Flags, SplitFlags);
+  unsigned Extra = DebugNode::splitFlags(Flags, SplitFlags);
 
   FieldSeparator FlagsFS(" | ");
   for (unsigned F : SplitFlags) {
-    const char *StringF = DIDescriptor::getFlagString(F);
+    const char *StringF = DebugNode::getFlagString(F);
     assert(StringF && "Expected valid flag");
     Out << FlagsFS << StringF;
   }
@@ -1505,7 +1503,7 @@ static void writeMDSubrange(raw_ostream &Out, const MDSubrange *N,
   Out << "!MDSubrange(";
   MDFieldPrinter Printer(Out);
   Printer.printInt("count", N->getCount(), /* ShouldSkipZero */ false);
-  Printer.printInt("lowerBound", N->getLo());
+  Printer.printInt("lowerBound", N->getLowerBound());
   Out << ")";
 }
 
@@ -1539,16 +1537,16 @@ static void writeMDDerivedType(raw_ostream &Out, const MDDerivedType *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope());
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope());
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("baseType", N->getBaseType(),
+  Printer.printMetadata("baseType", N->getRawBaseType(),
                         /* ShouldSkipNull */ false);
   Printer.printInt("size", N->getSizeInBits());
   Printer.printInt("align", N->getAlignInBits());
   Printer.printInt("offset", N->getOffsetInBits());
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("extraData", N->getExtraData());
+  Printer.printMetadata("extraData", N->getRawExtraData());
   Out << ")";
 }
 
@@ -1559,19 +1557,19 @@ static void writeMDCompositeType(raw_ostream &Out, const MDCompositeType *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope());
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope());
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("baseType", N->getBaseType());
+  Printer.printMetadata("baseType", N->getRawBaseType());
   Printer.printInt("size", N->getSizeInBits());
   Printer.printInt("align", N->getAlignInBits());
   Printer.printInt("offset", N->getOffsetInBits());
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("elements", N->getElements());
+  Printer.printMetadata("elements", N->getRawElements());
   Printer.printDwarfEnum("runtimeLang", N->getRuntimeLang(),
                          dwarf::LanguageString);
-  Printer.printMetadata("vtableHolder", N->getVTableHolder());
-  Printer.printMetadata("templateParams", N->getTemplateParams());
+  Printer.printMetadata("vtableHolder", N->getRawVTableHolder());
+  Printer.printMetadata("templateParams", N->getRawTemplateParams());
   Printer.printString("identifier", N->getIdentifier());
   Out << ")";
 }
@@ -1582,7 +1580,8 @@ static void writeMDSubroutineType(raw_ostream &Out, const MDSubroutineType *N,
   Out << "!MDSubroutineType(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("types", N->getTypeArray(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("types", N->getRawTypeArray(),
+                        /* ShouldSkipNull */ false);
   Out << ")";
 }
 
@@ -1604,7 +1603,7 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printDwarfEnum("language", N->getSourceLanguage(),
                          dwarf::LanguageString, /* ShouldSkipZero */ false);
-  Printer.printMetadata("file", N->getFile(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
   Printer.printString("producer", N->getProducer());
   Printer.printBool("isOptimized", N->isOptimized());
   Printer.printString("flags", N->getFlags());
@@ -1613,11 +1612,11 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N,
   Printer.printString("splitDebugFilename", N->getSplitDebugFilename());
   Printer.printInt("emissionKind", N->getEmissionKind(),
                    /* ShouldSkipZero */ false);
-  Printer.printMetadata("enums", N->getEnumTypes());
-  Printer.printMetadata("retainedTypes", N->getRetainedTypes());
-  Printer.printMetadata("subprograms", N->getSubprograms());
-  Printer.printMetadata("globals", N->getGlobalVariables());
-  Printer.printMetadata("imports", N->getImportedEntities());
+  Printer.printMetadata("enums", N->getRawEnumTypes());
+  Printer.printMetadata("retainedTypes", N->getRawRetainedTypes());
+  Printer.printMetadata("subprograms", N->getRawSubprograms());
+  Printer.printMetadata("globals", N->getRawGlobalVariables());
+  Printer.printMetadata("imports", N->getRawImportedEntities());
   Out << ")";
 }
 
@@ -1628,23 +1627,23 @@ static void writeMDSubprogram(raw_ostream &Out, const MDSubprogram *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
   Printer.printString("linkageName", N->getLinkageName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printBool("isLocal", N->isLocalToUnit());
   Printer.printBool("isDefinition", N->isDefinition());
   Printer.printInt("scopeLine", N->getScopeLine());
-  Printer.printMetadata("containingType", N->getContainingType());
+  Printer.printMetadata("containingType", N->getRawContainingType());
   Printer.printDwarfEnum("virtuality", N->getVirtuality(),
                          dwarf::VirtualityString);
   Printer.printInt("virtualIndex", N->getVirtualIndex());
   Printer.printDIFlags("flags", N->getFlags());
   Printer.printBool("isOptimized", N->isOptimized());
-  Printer.printMetadata("function", N->getFunction());
-  Printer.printMetadata("templateParams", N->getTemplateParams());
-  Printer.printMetadata("declaration", N->getDeclaration());
-  Printer.printMetadata("variables", N->getVariables());
+  Printer.printMetadata("function", N->getRawFunction());
+  Printer.printMetadata("templateParams", N->getRawTemplateParams());
+  Printer.printMetadata("declaration", N->getRawDeclaration());
+  Printer.printMetadata("variables", N->getRawVariables());
   Out << ")";
 }
 
@@ -1653,8 +1652,8 @@ static void writeMDLexicalBlock(raw_ostream &Out, const MDLexicalBlock *N,
                               const Module *Context) {
   Out << "!MDLexicalBlock(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
   Printer.printInt("column", N->getColumn());
   Out << ")";
@@ -1667,8 +1666,8 @@ static void writeMDLexicalBlockFile(raw_ostream &Out,
                                     const Module *Context) {
   Out << "!MDLexicalBlockFile(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("discriminator", N->getDiscriminator(),
                    /* ShouldSkipZero */ false);
   Out << ")";
@@ -1680,8 +1679,8 @@ static void writeMDNamespace(raw_ostream &Out, const MDNamespace *N,
   Out << "!MDNamespace(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
   Out << ")";
 }
@@ -1694,7 +1693,7 @@ static void writeMDTemplateTypeParameter(raw_ostream &Out,
   Out << "!MDTemplateTypeParameter(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("type", N->getType(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("type", N->getRawType(), /* ShouldSkipNull */ false);
   Out << ")";
 }
 
@@ -1708,7 +1707,7 @@ static void writeMDTemplateValueParameter(raw_ostream &Out,
   if (N->getTag() != dwarf::DW_TAG_template_value_parameter)
     Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printMetadata("value", N->getValue(), /* ShouldSkipNull */ false);
   Out << ")";
 }
@@ -1720,14 +1719,14 @@ static void writeMDGlobalVariable(raw_ostream &Out, const MDGlobalVariable *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
   Printer.printString("linkageName", N->getLinkageName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printBool("isLocal", N->isLocalToUnit());
   Printer.printBool("isDefinition", N->isDefinition());
-  Printer.printMetadata("variable", N->getVariable());
-  Printer.printMetadata("declaration", N->getStaticDataMemberDeclaration());
+  Printer.printMetadata("variable", N->getRawVariable());
+  Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
   Out << ")";
 }
 
@@ -1741,12 +1740,11 @@ static void writeMDLocalVariable(raw_ostream &Out, const MDLocalVariable *N,
   Printer.printInt("arg", N->getArg(),
                    /* ShouldSkipZero */
                    N->getTag() == dwarf::DW_TAG_auto_variable);
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("inlinedAt", N->getInlinedAt());
   Out << ")";
 }
 
@@ -1777,12 +1775,12 @@ static void writeMDObjCProperty(raw_ostream &Out, const MDObjCProperty *N,
   Out << "!MDObjCProperty(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
   Printer.printString("setter", N->getSetterName());
   Printer.printString("getter", N->getGetterName());
   Printer.printInt("attributes", N->getAttributes());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Out << ")";
 }
 
@@ -1793,8 +1791,8 @@ static void writeMDImportedEntity(raw_ostream &Out, const MDImportedEntity *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("entity", N->getEntity());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("entity", N->getRawEntity());
   Printer.printInt("line", N->getLine());
   Out << ")";
 }
@@ -1943,16 +1941,19 @@ class AssemblyWriter {
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
   SetVector<const Comdat *> Comdats;
+  bool ShouldPreserveUseListOrder;
   UseListOrderStack UseListOrders;
 
 public:
   /// Construct an AssemblyWriter with an external SlotTracker
-  AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                 const Module *M, AssemblyAnnotationWriter *AAW);
+  AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M,
+                 AssemblyAnnotationWriter *AAW,
+                 bool ShouldPreserveUseListOrder = false);
 
   /// Construct an AssemblyWriter with an internally allocated SlotTracker
   AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                 AssemblyAnnotationWriter *AAW);
+                 AssemblyAnnotationWriter *AAW,
+                 bool ShouldPreserveUseListOrder = false);
 
   void printMDNodeBody(const MDNode *MD);
   void printNamedMDNode(const NamedMDNode *NMD);
@@ -2004,18 +2005,20 @@ void AssemblyWriter::init() {
       Comdats.insert(C);
 }
 
-
 AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                               const Module *M,
-                               AssemblyAnnotationWriter *AAW)
-  : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW) {
+                               const Module *M, AssemblyAnnotationWriter *AAW,
+                               bool ShouldPreserveUseListOrder)
+    : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   init();
 }
 
 AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                               AssemblyAnnotationWriter *AAW)
-  : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)),
-    Machine(*ModuleSlotTracker), AnnotationWriter(AAW) {
+                               AssemblyAnnotationWriter *AAW,
+                               bool ShouldPreserveUseListOrder)
+    : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)),
+      Machine(*ModuleSlotTracker), AnnotationWriter(AAW),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   init();
 }
 
@@ -2103,7 +2106,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
 void AssemblyWriter::printModule(const Module *M) {
   Machine.initialize();
 
-  if (shouldPreserveAssemblyUseListOrder())
+  if (ShouldPreserveUseListOrder)
     UseListOrders = predictUseListOrder(M);
 
   if (!M->getModuleIdentifier().empty() &&
@@ -2778,8 +2781,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     }
 
     Operand = CI->getCalledValue();
-    PointerType *PTy = cast<PointerType>(Operand->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    FunctionType *FTy = cast<FunctionType>(CI->getFunctionType());
     Type *RetTy = FTy->getReturnType();
     const AttributeSet &PAL = CI->getAttributes();
 
@@ -2791,15 +2793,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     // and if the return type is not a pointer to a function.
     //
     Out << ' ';
-    if (!FTy->isVarArg() &&
-        (!RetTy->isPointerTy() ||
-         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
-      TypePrinter.print(RetTy, Out);
-      Out << ' ';
-      writeOperand(Operand, false);
-    } else {
-      writeOperand(Operand, true);
-    }
+    TypePrinter.print(FTy->isVarArg() ? FTy : RetTy, Out);
+    Out << ' ';
+    writeOperand(Operand, false);
     Out << '(';
     for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
       if (op > 0)
@@ -3060,10 +3056,18 @@ void AssemblyWriter::printUseLists(const Function *F) {
 //                       External Interface declarations
 //===----------------------------------------------------------------------===//
 
-void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(this->getParent());
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, this->getParent(), AAW);
+  W.printFunction(this);
+}
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
+                   bool ShouldPreserveUseListOrder) const {
   SlotTracker SlotTable(this);
   formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, this, AAW);
+  AssemblyWriter W(OS, SlotTable, this, AAW, ShouldPreserveUseListOrder);
   W.printModule(this);
 }
 
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 199c318..d544689 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -115,10 +115,10 @@ class IntAttributeImpl : public EnumAttributeImpl {
 public:
   IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val)
       : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) {
-    assert(
-        (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
-         Kind == Attribute::Dereferenceable) &&
-        "Wrong kind for int attribute!");
+    assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
+            Kind == Attribute::Dereferenceable ||
+            Kind == Attribute::DereferenceableOrNull) &&
+           "Wrong kind for int attribute!");
   }
 
   uint64_t getValue() const { return Val; }
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index daac6b5..be5b74f 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -94,6 +94,12 @@ Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context,
   return get(Context, Dereferenceable, Bytes);
 }
 
+Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
+                                                       uint64_t Bytes) {
+  assert(Bytes && "Bytes must be non-zero.");
+  return get(Context, DereferenceableOrNull, Bytes);
+}
+
 //===----------------------------------------------------------------------===//
 // Attribute Accessor Methods
 //===----------------------------------------------------------------------===//
@@ -170,6 +176,13 @@ uint64_t Attribute::getDereferenceableBytes() const {
   return pImpl->getValueAsInt();
 }
 
+uint64_t Attribute::getDereferenceableOrNullBytes() const {
+  assert(hasAttribute(Attribute::DereferenceableOrNull) &&
+         "Trying to get dereferenceable bytes from "
+         "non-dereferenceable attribute!");
+  return pImpl->getValueAsInt();
+}
+
 std::string Attribute::getAsString(bool InAttrGrp) const {
   if (!pImpl) return "";
 
@@ -263,9 +276,9 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return Result;
   }
 
-  if (hasAttribute(Attribute::StackAlignment)) {
+  auto AttrWithBytesToString = [&](const char *Name) {
     std::string Result;
-    Result += "alignstack";
+    Result += Name;
     if (InAttrGrp) {
       Result += "=";
       Result += utostr(getValueAsInt());
@@ -275,21 +288,16 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
       Result += ")";
     }
     return Result;
-  }
+  };
 
-  if (hasAttribute(Attribute::Dereferenceable)) {
-    std::string Result;
-    Result += "dereferenceable";
-    if (InAttrGrp) {
-      Result += "=";
-      Result += utostr(getValueAsInt());
-    } else {
-      Result += "(";
-      Result += utostr(getValueAsInt());
-      Result += ")";
-    }
-    return Result;
-  }
+  if (hasAttribute(Attribute::StackAlignment))
+    return AttrWithBytesToString("alignstack");
+
+  if (hasAttribute(Attribute::Dereferenceable))
+    return AttrWithBytesToString("dereferenceable");
+
+  if (hasAttribute(Attribute::DereferenceableOrNull))
+    return AttrWithBytesToString("dereferenceable_or_null");
 
   // Convert target-dependent attributes to strings of the form:
   //
@@ -298,12 +306,12 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
   //
   if (isStringAttribute()) {
     std::string Result;
-    Result += '\"' + getKindAsString().str() + '"';
+    Result += (Twine('"') + getKindAsString() + Twine('"')).str();
 
     StringRef Val = pImpl->getValueAsString();
     if (Val.empty()) return Result;
 
-    Result += "=\"" + Val.str() + '"';
+    Result += ("=\"" + Val + Twine('"')).str();
     return Result;
   }
 
@@ -428,6 +436,11 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::JumpTable:       return 1ULL << 45;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
+    break;
+  case Attribute::DereferenceableOrNull:
+    llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
+                     "format");
+    break;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -663,6 +676,10 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
       Attrs.push_back(std::make_pair(Index,
                                      Attribute::getWithDereferenceableBytes(C,
                                        B.getDereferenceableBytes())));
+    else if (Kind == Attribute::DereferenceableOrNull)
+      Attrs.push_back(
+          std::make_pair(Index, Attribute::getWithDereferenceableOrNullBytes(
+                                    C, B.getDereferenceableOrNullBytes())));
     else
       Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
   }
@@ -842,6 +859,14 @@ AttributeSet AttributeSet::addDereferenceableAttr(LLVMContext &C, unsigned Index
   return addAttributes(C, Index, AttributeSet::get(C, Index, B));
 }
 
+AttributeSet AttributeSet::addDereferenceableOrNullAttr(LLVMContext &C,
+                                                        unsigned Index,
+                                                        uint64_t Bytes) const {
+  llvm::AttrBuilder B;
+  B.addDereferenceableOrNullAttr(Bytes);
+  return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+}
+
 //===----------------------------------------------------------------------===//
 // AttributeSet Accessor Methods
 //===----------------------------------------------------------------------===//
@@ -1011,7 +1036,8 @@ void AttributeSet::dump() const {
 //===----------------------------------------------------------------------===//
 
 AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
-  : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) {
+    : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
+      DerefOrNullBytes(0) {
   AttributeSetImpl *pImpl = AS.pImpl;
   if (!pImpl) return;
 
@@ -1028,7 +1054,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
 
 void AttrBuilder::clear() {
   Attrs.reset();
-  Alignment = StackAlignment = DerefBytes = 0;
+  Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1055,6 +1081,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     StackAlignment = Attr.getStackAlignment();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
+  else if (Kind == Attribute::DereferenceableOrNull)
+    DerefOrNullBytes = Attr.getDereferenceableOrNullBytes();
   return *this;
 }
 
@@ -1073,6 +1101,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     StackAlignment = 0;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
+  else if (Val == Attribute::DereferenceableOrNull)
+    DerefOrNullBytes = 0;
 
   return *this;
 }
@@ -1099,6 +1129,8 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
         StackAlignment = 0;
       else if (Kind == Attribute::Dereferenceable)
         DerefBytes = 0;
+      else if (Kind == Attribute::DereferenceableOrNull)
+        DerefOrNullBytes = 0;
     } else {
       assert(Attr.isStringAttribute() && "Invalid attribute type!");
       std::map<std::string, std::string>::iterator
@@ -1149,6 +1181,15 @@ AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) {
   return *this;
 }
 
+AttrBuilder &AttrBuilder::addDereferenceableOrNullAttr(uint64_t Bytes) {
+  if (Bytes == 0)
+    return *this;
+
+  Attrs[Attribute::DereferenceableOrNull] = true;
+  DerefOrNullBytes = Bytes;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
@@ -1225,7 +1266,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
 
   for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
        I = Attribute::AttrKind(I + 1)) {
-    if (I == Attribute::Dereferenceable)
+    if (I == Attribute::Dereferenceable ||
+        I == Attribute::DereferenceableOrNull)
       continue;
     if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
       Attrs[I] = true;
@@ -1261,6 +1303,7 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
       .addAttribute(Attribute::NoCapture)
       .addAttribute(Attribute::NonNull)
       .addDereferenceableAttr(1) // the int here is ignored
+      .addDereferenceableOrNullAttr(1) // the int here is ignored
       .addAttribute(Attribute::ReadNone)
       .addAttribute(Attribute::ReadOnly)
       .addAttribute(Attribute::StructRet)
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index d2dfeaa..bb23d2c 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -124,19 +124,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
   }
-  case 'd': {
-    if (Name.startswith("dbg.declare") && F->arg_size() == 2) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_declare);
-      return true;
-    }
-    if (Name.startswith("dbg.value") && F->arg_size() == 3) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
-      return true;
-    }
-    break;
-  }
 
   case 'o':
     // We only need to change the name to match the mangling including the
@@ -354,23 +341,6 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
   return false;
 }
 
-static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
-  if (!DbgNode || Elt >= DbgNode->getNumOperands())
-    return nullptr;
-  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt));
-}
-
-static MetadataAsValue *getExpression(Value *VarOperand, Function *F) {
-  // Old-style DIVariables have an optional expression as the 8th element.
-  DIExpression Expr(getNodeField(
-      cast<MDNode>(cast<MetadataAsValue>(VarOperand)->getMetadata()), 8));
-  if (!Expr) {
-    DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
-    Expr = DIB.createExpression();
-  }
-  return MetadataAsValue::get(F->getContext(), Expr);
-}
-
 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
 // to byte shuffles.
 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
@@ -745,7 +715,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     return;
   }
 
-  std::string Name = CI->getName().str();
+  std::string Name = CI->getName();
   if (!Name.empty())
     CI->setName(Name + ".old");
 
@@ -753,25 +723,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   default:
     llvm_unreachable("Unknown function for CallInst upgrade.");
 
-  // Upgrade debug intrinsics to use an additional DIExpression argument.
-  case Intrinsic::dbg_declare: {
-    auto NewCI =
-        Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
-                            getExpression(CI->getArgOperand(1), F), Name);
-    NewCI->setDebugLoc(CI->getDebugLoc());
-    CI->replaceAllUsesWith(NewCI);
-    CI->eraseFromParent();
-    return;
-  }
-  case Intrinsic::dbg_value: {
-    auto NewCI = Builder.CreateCall4(
-        NewFn, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
-        getExpression(CI->getArgOperand(2), F), Name);
-    NewCI->setDebugLoc(CI->getDebugLoc());
-    CI->replaceAllUsesWith(NewCI);
-    CI->eraseFromParent();
-    return;
-  }
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
     assert(CI->getNumArgOperands() == 1 &&
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index fe38385..23ec705 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -94,8 +94,8 @@ void BasicBlock::removeFromParent() {
   getParent()->getBasicBlockList().remove(this);
 }
 
-void BasicBlock::eraseFromParent() {
-  getParent()->getBasicBlockList().erase(this);
+iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
+  return getParent()->getBasicBlockList().erase(this);
 }
 
 /// Unlink this basic block from its current function and
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index 9fef0b2..d2e0c38 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -41,7 +41,6 @@ add_llvm_library(LLVMCore
   Type.cpp
   TypeFinder.cpp
   Use.cpp
-  UseListOrder.cpp
   User.cpp
   Value.cpp
   ValueSymbolTable.cpp
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index d97d2c4..d3caf04 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -132,7 +132,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
 
         if (ElTy == DPTy->getElementType())
           // This GEP is inbounds because all indices are zero.
-          return ConstantExpr::getInBoundsGetElementPtr(V, IdxList);
+          return ConstantExpr::getInBoundsGetElementPtr(PTy->getElementType(),
+                                                        V, IdxList);
       }
 
   // Handle casts from one vector constant to another.  We know that the src 
@@ -169,7 +170,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
       // be the same. Consequently, we just fold to V.
       return V;
 
-    if (DestTy->isFloatingPointTy())
+    // See note below regarding the PPC_FP128 restriction.
+    if (DestTy->isFloatingPointTy() && !DestTy->isPPC_FP128Ty())
       return ConstantFP::get(DestTy->getContext(),
                              APFloat(DestTy->getFltSemantics(),
                                      CI->getValue()));
@@ -179,9 +181,19 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
   }
 
   // Handle ConstantFP input: FP -> Integral.
-  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+  if (ConstantFP *FP = dyn_cast<ConstantFP>(V)) {
+    // PPC_FP128 is really the sum of two consecutive doubles, where the first
+    // double is always stored first in memory, regardless of the target
+    // endianness. The memory layout of i128, however, depends on the target
+    // endianness, and so we can't fold this without target endianness
+    // information. This should instead be handled by
+    // Analysis/ConstantFolding.cpp
+    if (FP->getType()->isPPC_FP128Ty())
+      return nullptr;
+
     return ConstantInt::get(FP->getContext(),
                             FP->getValueAPF().bitcastToAPInt());
+  }
 
   return nullptr;
 }
@@ -2020,7 +2032,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
 
   if (isa<UndefValue>(C)) {
     PointerType *Ptr = cast<PointerType>(C->getType());
-    Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+    Type *Ty = GetElementPtrInst::getIndexedType(
+        cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs);
     assert(Ty && "Invalid indices for GEP!");
     return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
   }
@@ -2034,7 +2047,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
       }
     if (isNull) {
       PointerType *Ptr = cast<PointerType>(C->getType());
-      Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+      Type *Ty = GetElementPtrInst::getIndexedType(
+          cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs);
       assert(Ty && "Invalid indices for GEP!");
       return ConstantPointerNull::get(PointerType::get(Ty,
                                                        Ptr->getAddressSpace()));
@@ -2107,10 +2121,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
 
         NewIndices.push_back(Combined);
         NewIndices.append(Idxs.begin() + 1, Idxs.end());
-        return
-          ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices,
-                                         inBounds &&
-                                           cast<GEPOperator>(CE)->isInBounds());
+        return ConstantExpr::getGetElementPtr(
+            cast<GEPOperator>(CE)->getSourceElementType(), CE->getOperand(0),
+            NewIndices, inBounds && cast<GEPOperator>(CE)->isInBounds());
       }
     }
 
@@ -2135,8 +2148,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
         if (SrcArrayTy && DstArrayTy
             && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
             && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
-          return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
-                                                Idxs, inBounds);
+          return ConstantExpr::getGetElementPtr(
+              SrcArrayTy, (Constant *)CE->getOperand(0), Idxs, inBounds);
       }
     }
   }
@@ -2202,7 +2215,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
   if (!NewIdxs.empty()) {
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
       if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
-    return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds);
+    return ConstantExpr::getGetElementPtr(nullptr, C, NewIdxs, inBounds);
   }
 
   // If all indices are known integers and normalized, we can do a simple
@@ -2210,7 +2223,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
   if (!Unknown && !inBounds)
     if (auto *GV = dyn_cast<GlobalVariable>(C))
       if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs))
-        return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
+        return ConstantExpr::getInBoundsGetElementPtr(nullptr, C, Idxs);
 
   return nullptr;
 }
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index e51a396..3f8d1f1 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1252,7 +1252,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2],
                                           OnlyIfReducedTy);
   case Instruction::GetElementPtr:
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+    return ConstantExpr::getGetElementPtr(nullptr, Ops[0], Ops.slice(1),
                                           cast<GEPOperator>(this)->isInBounds(),
                                           OnlyIfReducedTy);
   case Instruction::ICmp:
@@ -1925,7 +1925,7 @@ Constant *ConstantExpr::getSizeOf(Type* Ty) {
   // Note that a non-inbounds gep is used, as null isn't within any object.
   Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *GEP = getGetElementPtr(
-                 Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+      Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
   return getPtrToInt(GEP, 
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -1939,7 +1939,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) {
   Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *Indices[2] = { Zero, One };
-  Constant *GEP = getGetElementPtr(NullPtr, Indices);
+  Constant *GEP = getGetElementPtr(AligningTy, NullPtr, Indices);
   return getPtrToInt(GEP,
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -1957,7 +1957,7 @@ Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
     FieldNo
   };
   Constant *GEP = getGetElementPtr(
-                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+      Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
   return getPtrToInt(GEP,
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -2001,19 +2001,22 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2,
   return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
 }
 
-Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
-                                         bool InBounds, Type *OnlyIfReducedTy) {
-  assert(C->getType()->isPtrOrPtrVectorTy() &&
-         "Non-pointer type for constant GetElementPtr expression");
-
+Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
+                                         ArrayRef<Value *> Idxs, bool InBounds,
+                                         Type *OnlyIfReducedTy) {
   if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
     return FC;          // Fold a few common cases.
 
+  if (!Ty)
+    Ty = cast<PointerType>(C->getType()->getScalarType())->getElementType();
+  else
+    assert(Ty ==
+           cast<PointerType>(C->getType()->getScalarType())->getElementType());
   // Get the result type of the getelementptr!
-  Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
-  assert(Ty && "GEP indices invalid!");
+  Type *DestTy = GetElementPtrInst::getIndexedType(Ty, Idxs);
+  assert(DestTy && "GEP indices invalid!");
   unsigned AS = C->getType()->getPointerAddressSpace();
-  Type *ReqTy = Ty->getPointerTo(AS);
+  Type *ReqTy = DestTy->getPointerTo(AS);
   if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
     ReqTy = VectorType::get(ReqTy, VecTy->getNumElements());
 
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 613147e..7fe7beb 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1153,8 +1153,8 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices) {
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
-                                             IdxList));
+  return wrap(ConstantExpr::getGetElementPtr(
+      nullptr, unwrap<Constant>(ConstantVal), IdxList));
 }
 
 LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
@@ -1163,7 +1163,7 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
   Constant* Val = unwrap<Constant>(ConstantVal);
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList));
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(nullptr, Val, IdxList));
 }
 
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
@@ -2181,14 +2181,13 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
 void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
   MDNode *Loc =
       L ? cast<MDNode>(unwrap<MetadataAsValue>(L)->getMetadata()) : nullptr;
-  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(Loc));
 }
 
 LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
   LLVMContext &Context = unwrap(Builder)->getContext();
   return wrap(MetadataAsValue::get(
-      Context,
-      unwrap(Builder)->getCurrentDebugLocation().getAsMDNode(Context)));
+      Context, unwrap(Builder)->getCurrentDebugLocation().getAsMDNode()));
 }
 
 void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
@@ -2513,12 +2512,13 @@ LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                   LLVMValueRef *Indices, unsigned NumIndices,
                                   const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
-  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name));
+  return wrap(
+      unwrap(B)->CreateInBoundsGEP(nullptr, unwrap(Pointer), IdxList, Name));
 }
 
 LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                 unsigned Idx, const char *Name) {
-  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+  return wrap(unwrap(B)->CreateStructGEP(nullptr, unwrap(Pointer), Idx, Name));
 }
 
 LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 9677de4..891fb86 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -74,8 +74,7 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
 }
 
 void DIBuilder::finalize() {
-  DIArray Enums = getOrCreateArray(AllEnumTypes);
-  DIType(TempEnumTypes).replaceAllUsesWith(Enums);
+  TempEnumTypes->replaceAllUsesWith(MDTuple::get(VMContext, AllEnumTypes));
 
   SmallVector<Metadata *, 16> RetainValues;
   // Declarations and definitions of the same type may be retained. Some
@@ -86,28 +85,24 @@ void DIBuilder::finalize() {
   for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
     if (RetainSet.insert(AllRetainTypes[I]).second)
       RetainValues.push_back(AllRetainTypes[I]);
-  DIArray RetainTypes = getOrCreateArray(RetainValues);
-  DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
-
-  DIArray SPs = getOrCreateArray(AllSubprograms);
-  DIType(TempSubprograms).replaceAllUsesWith(SPs);
-  for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-    DISubprogram SP(SPs.getElement(i));
-    if (MDNode *Temp = SP.getVariablesNodes()) {
+  TempRetainTypes->replaceAllUsesWith(MDTuple::get(VMContext, RetainValues));
+
+  MDSubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+  TempSubprograms->replaceAllUsesWith(SPs.get());
+  for (auto *SP : SPs) {
+    if (MDTuple *Temp = SP->getVariables().get()) {
       const auto &PV = PreservedVariables.lookup(SP);
       SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
       DIArray AV = getOrCreateArray(Variables);
-      DIType(Temp).replaceAllUsesWith(AV);
+      TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
     }
   }
 
-  DIArray GVs = getOrCreateArray(AllGVs);
-  DIType(TempGVs).replaceAllUsesWith(GVs);
+  TempGVs->replaceAllUsesWith(MDTuple::get(VMContext, AllGVs));
 
-  SmallVector<Metadata *, 16> RetainValuesI(AllImportedModules.begin(),
-                                            AllImportedModules.end());
-  DIArray IMs = getOrCreateArray(RetainValuesI);
-  DIType(TempImportedModules).replaceAllUsesWith(IMs);
+  TempImportedModules->replaceAllUsesWith(MDTuple::get(
+      VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
+                                             AllImportedModules.end())));
 
   // Now that all temp nodes have been replaced or deleted, resolve remaining
   // cycles.
@@ -121,19 +116,16 @@ void DIBuilder::finalize() {
 }
 
 /// If N is compile unit return NULL otherwise return N.
-static MDScope *getNonCompileUnitScope(MDNode *N) {
+static MDScope *getNonCompileUnitScope(MDScope *N) {
   if (!N || isa<MDCompileUnit>(N))
     return nullptr;
   return cast<MDScope>(N);
 }
 
-DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
-                                           StringRef Directory,
-                                           StringRef Producer, bool isOptimized,
-                                           StringRef Flags, unsigned RunTimeVer,
-                                           StringRef SplitName,
-                                           DebugEmissionKind Kind,
-                                           bool EmitDebugInfo) {
+MDCompileUnit *DIBuilder::createCompileUnit(
+    unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer,
+    bool isOptimized, StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
+    DebugEmissionKind Kind, bool EmitDebugInfo) {
 
   assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) ||
           (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
@@ -143,18 +135,19 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
 
   // TODO: Once we make MDCompileUnit distinct, stop using temporaries here
   // (just start with operands assigned to nullptr).
-  TempEnumTypes = MDTuple::getTemporary(VMContext, None).release();
-  TempRetainTypes = MDTuple::getTemporary(VMContext, None).release();
-  TempSubprograms = MDTuple::getTemporary(VMContext, None).release();
-  TempGVs = MDTuple::getTemporary(VMContext, None).release();
-  TempImportedModules = MDTuple::getTemporary(VMContext, None).release();
+  TempEnumTypes = MDTuple::getTemporary(VMContext, None);
+  TempRetainTypes = MDTuple::getTemporary(VMContext, None);
+  TempSubprograms = MDTuple::getTemporary(VMContext, None);
+  TempGVs = MDTuple::getTemporary(VMContext, None);
+  TempImportedModules = MDTuple::getTemporary(VMContext, None);
 
   // TODO: Switch to getDistinct().  We never want to merge compile units based
   // on contents.
-  MDNode *CUNode = MDCompileUnit::get(
+  MDCompileUnit *CUNode = MDCompileUnit::get(
       VMContext, Lang, MDFile::get(VMContext, Filename, Directory), Producer,
-      isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes,
-      TempRetainTypes, TempSubprograms, TempGVs, TempImportedModules);
+      isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes.get(),
+      TempRetainTypes.get(), TempSubprograms.get(), TempGVs.get(),
+      TempImportedModules.get());
 
   // Create a named metadata so that it is easier to find cu in a module.
   // Note that we only generate this when the caller wants to actually
@@ -167,141 +160,136 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
   }
 
   trackIfUnresolved(CUNode);
-  return DICompileUnit(CUNode);
+  return CUNode;
 }
 
-static DIImportedEntity
-createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context,
+static MDImportedEntity*
+createImportedModule(LLVMContext &C, dwarf::Tag Tag, MDScope* Context,
                      Metadata *NS, unsigned Line, StringRef Name,
                      SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
-  DIImportedEntity M = MDImportedEntity::get(C, Tag, Context, NS, Line, Name);
-  assert(M.Verify() && "Imported module should be valid");
-  AllImportedModules.emplace_back(M.get());
+  auto *M =
+      MDImportedEntity::get(C, Tag, Context, DebugNodeRef(NS), Line, Name);
+  AllImportedModules.emplace_back(M);
   return M;
 }
 
-DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
-                                                 DINameSpace NS,
+MDImportedEntity* DIBuilder::createImportedModule(MDScope* Context,
+                                                 MDNamespace* NS,
                                                  unsigned Line) {
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
                                 Context, NS, Line, StringRef(), AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
-                                                 DIImportedEntity NS,
+MDImportedEntity* DIBuilder::createImportedModule(MDScope* Context,
+                                                 MDImportedEntity* NS,
                                                  unsigned Line) {
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
                                 Context, NS, Line, StringRef(), AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
-                                                      DIDescriptor Decl,
-                                                      unsigned Line, StringRef Name) {
+MDImportedEntity *DIBuilder::createImportedDeclaration(MDScope *Context,
+                                                       DebugNode *Decl,
+                                                       unsigned Line,
+                                                       StringRef Name) {
   // Make sure to use the unique identifier based metadata reference for
   // types that have one.
-  Metadata *V =
-      Decl.isType() ? static_cast<Metadata *>(DIType(Decl).getRef()) : Decl;
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
-                                Context, V, Line, Name,
+                                Context, DebugNodeRef::get(Decl), Line, Name,
                                 AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
-                                                      DIImportedEntity Imp,
-                                                      unsigned Line, StringRef Name) {
-  return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
-                                Context, Imp, Line, Name, AllImportedModules);
-}
-
-DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+MDFile* DIBuilder::createFile(StringRef Filename, StringRef Directory) {
   return MDFile::get(VMContext, Filename, Directory);
 }
 
-DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
+MDEnumerator *DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
   assert(!Name.empty() && "Unable to create enumerator without name");
   return MDEnumerator::get(VMContext, Val, Name);
 }
 
-DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) {
+MDBasicType *DIBuilder::createUnspecifiedType(StringRef Name) {
   assert(!Name.empty() && "Unable to create type without name");
   return MDBasicType::get(VMContext, dwarf::DW_TAG_unspecified_type, Name);
 }
 
-DIBasicType DIBuilder::createNullPtrType() {
+MDBasicType *DIBuilder::createNullPtrType() {
   return createUnspecifiedType("decltype(nullptr)");
 }
 
-DIBasicType
-DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
-                           uint64_t AlignInBits, unsigned Encoding) {
+MDBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+                                        uint64_t AlignInBits,
+                                        unsigned Encoding) {
   assert(!Name.empty() && "Unable to create type without name");
   return MDBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits,
                           AlignInBits, Encoding);
 }
 
-DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+MDDerivedType *DIBuilder::createQualifiedType(unsigned Tag, MDType *FromTy) {
   return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
-                            FromTy.getRef(), 0, 0, 0, 0);
+                            MDTypeRef::get(FromTy), 0, 0, 0, 0);
 }
 
-DIDerivedType
-DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
-                             uint64_t AlignInBits, StringRef Name) {
+MDDerivedType *DIBuilder::createPointerType(MDType *PointeeTy,
+                                            uint64_t SizeInBits,
+                                            uint64_t AlignInBits,
+                                            StringRef Name) {
   // FIXME: Why is there a name here?
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
-                            nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits,
-                            AlignInBits, 0, 0);
+                            nullptr, 0, nullptr, MDTypeRef::get(PointeeTy),
+                            SizeInBits, AlignInBits, 0, 0);
 }
 
-DIDerivedType
-DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base,
-                                   uint64_t SizeInBits, uint64_t AlignInBits) {
+MDDerivedType *DIBuilder::createMemberPointerType(MDType *PointeeTy,
+                                                  MDType *Base,
+                                                  uint64_t SizeInBits,
+                                                  uint64_t AlignInBits) {
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "",
-                            nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits,
-                            AlignInBits, 0, 0, Base.getRef());
+                            nullptr, 0, nullptr, MDTypeRef::get(PointeeTy),
+                            SizeInBits, AlignInBits, 0, 0, MDTypeRef::get(Base));
 }
 
-DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
-  assert(RTy.isType() && "Unable to create reference type");
+MDDerivedType *DIBuilder::createReferenceType(unsigned Tag, MDType *RTy) {
+  assert(RTy && "Unable to create reference type");
   return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
-                            RTy.getRef(), 0, 0, 0, 0);
+                            MDTypeRef::get(RTy), 0, 0, 0, 0);
 }
 
-DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
-                                       unsigned LineNo, DIDescriptor Context) {
-  return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name,
-                            File.getFileNode(), LineNo,
-                            DIScope(getNonCompileUnitScope(Context)).getRef(),
-                            Ty.getRef(), 0, 0, 0, 0);
+MDDerivedType *DIBuilder::createTypedef(MDType *Ty, StringRef Name,
+                                        MDFile *File, unsigned LineNo,
+                                        MDScope *Context) {
+  return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
+                            LineNo,
+                            MDScopeRef::get(getNonCompileUnitScope(Context)),
+                            MDTypeRef::get(Ty), 0, 0, 0, 0);
 }
 
-DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
-  // typedefs are encoded in DIDerivedType format.
-  assert(Ty.isType() && "Invalid type!");
-  assert(FriendTy.isType() && "Invalid friend type!");
+MDDerivedType *DIBuilder::createFriend(MDType *Ty, MDType *FriendTy) {
+  assert(Ty && "Invalid type!");
+  assert(FriendTy && "Invalid friend type!");
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0,
-                            Ty.getRef(), FriendTy.getRef(), 0, 0, 0, 0);
+                            MDTypeRef::get(Ty), MDTypeRef::get(FriendTy), 0, 0,
+                            0, 0);
 }
 
-DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
-                                           uint64_t BaseOffset,
-                                           unsigned Flags) {
-  assert(Ty.isType() && "Unable to create inheritance");
+MDDerivedType *DIBuilder::createInheritance(MDType *Ty, MDType *BaseTy,
+                                            uint64_t BaseOffset,
+                                            unsigned Flags) {
+  assert(Ty && "Unable to create inheritance");
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
-                            0, Ty.getRef(), BaseTy.getRef(), 0, 0, BaseOffset,
-                            Flags);
+                            0, MDTypeRef::get(Ty), MDTypeRef::get(BaseTy), 0, 0,
+                            BaseOffset, Flags);
 }
 
-DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
-                                          DIFile File, unsigned LineNumber,
-                                          uint64_t SizeInBits,
-                                          uint64_t AlignInBits,
-                                          uint64_t OffsetInBits, unsigned Flags,
-                                          DIType Ty) {
+MDDerivedType *DIBuilder::createMemberType(MDScope *Scope, StringRef Name,
+                                           MDFile *File, unsigned LineNumber,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits,
+                                           uint64_t OffsetInBits,
+                                           unsigned Flags, MDType *Ty) {
   return MDDerivedType::get(
       VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), SizeInBits,
-      AlignInBits, OffsetInBits, Flags);
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), MDTypeRef::get(Ty),
+      SizeInBits, AlignInBits, OffsetInBits, Flags);
 }
 
 static ConstantAsMetadata *getConstantOrNull(Constant *C) {
@@ -310,135 +298,124 @@ static ConstantAsMetadata *getConstantOrNull(Constant *C) {
   return nullptr;
 }
 
-DIDerivedType DIBuilder::createStaticMemberType(DIDescriptor Scope,
-                                                StringRef Name, DIFile File,
-                                                unsigned LineNumber, DIType Ty,
-                                                unsigned Flags,
-                                                llvm::Constant *Val) {
-  // TAG_member is encoded in DIDerivedType format.
-  Flags |= DIDescriptor::FlagStaticMember;
+MDDerivedType *DIBuilder::createStaticMemberType(MDScope *Scope, StringRef Name,
+                                                 MDFile *File,
+                                                 unsigned LineNumber,
+                                                 MDType *Ty, unsigned Flags,
+                                                 llvm::Constant *Val) {
+  Flags |= DebugNode::FlagStaticMember;
   return MDDerivedType::get(
       VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), 0, 0, 0,
-      Flags, getConstantOrNull(Val));
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), MDTypeRef::get(Ty), 0, 0,
+      0, Flags, getConstantOrNull(Val));
 }
 
-DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
-                                        unsigned LineNumber,
-                                        uint64_t SizeInBits,
-                                        uint64_t AlignInBits,
-                                        uint64_t OffsetInBits, unsigned Flags,
-                                        DIType Ty, MDNode *PropertyNode) {
-  return MDDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
-                            LineNumber, getNonCompileUnitScope(File),
-                            Ty.getRef(), SizeInBits, AlignInBits, OffsetInBits,
-                            Flags, PropertyNode);
+MDDerivedType *DIBuilder::createObjCIVar(StringRef Name, MDFile *File,
+                                         unsigned LineNumber,
+                                         uint64_t SizeInBits,
+                                         uint64_t AlignInBits,
+                                         uint64_t OffsetInBits, unsigned Flags,
+                                         MDType *Ty, MDNode *PropertyNode) {
+  return MDDerivedType::get(
+      VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
+      MDScopeRef::get(getNonCompileUnitScope(File)), MDTypeRef::get(Ty),
+      SizeInBits, AlignInBits, OffsetInBits, Flags, PropertyNode);
 }
 
-DIObjCProperty
-DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber,
+MDObjCProperty *
+DIBuilder::createObjCProperty(StringRef Name, MDFile *File, unsigned LineNumber,
                               StringRef GetterName, StringRef SetterName,
-                              unsigned PropertyAttributes, DIType Ty) {
+                              unsigned PropertyAttributes, MDType *Ty) {
   return MDObjCProperty::get(VMContext, Name, File, LineNumber, GetterName,
                              SetterName, PropertyAttributes, Ty);
 }
 
-DITemplateTypeParameter
-DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
-                                       DIType Ty) {
-  assert(!DIScope(getNonCompileUnitScope(Context)).getRef() &&
-         "Expected compile unit");
-  return MDTemplateTypeParameter::get(VMContext, Name, Ty.getRef());
+MDTemplateTypeParameter *
+DIBuilder::createTemplateTypeParameter(MDScope *Context, StringRef Name,
+                                       MDType *Ty) {
+  assert((!Context || isa<MDCompileUnit>(Context)) && "Expected compile unit");
+  return MDTemplateTypeParameter::get(VMContext, Name, MDTypeRef::get(Ty));
 }
 
-static DITemplateValueParameter
+static MDTemplateValueParameter *
 createTemplateValueParameterHelper(LLVMContext &VMContext, unsigned Tag,
-                                   DIDescriptor Context, StringRef Name,
-                                   DIType Ty, Metadata *MD) {
-  assert(!DIScope(getNonCompileUnitScope(Context)).getRef() &&
-         "Expected compile unit");
-  return MDTemplateValueParameter::get(VMContext, Tag, Name, Ty.getRef(), MD);
+                                   MDScope *Context, StringRef Name, MDType *Ty,
+                                   Metadata *MD) {
+  assert((!Context || isa<MDCompileUnit>(Context)) && "Expected compile unit");
+  return MDTemplateValueParameter::get(VMContext, Tag, Name, MDTypeRef::get(Ty),
+                                       MD);
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
-                                        DIType Ty, Constant *Val) {
+MDTemplateValueParameter *
+DIBuilder::createTemplateValueParameter(MDScope *Context, StringRef Name,
+                                        MDType *Ty, Constant *Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_template_value_parameter, Context, Name, Ty,
       getConstantOrNull(Val));
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateTemplateParameter(DIDescriptor Context, StringRef Name,
-                                           DIType Ty, StringRef Val) {
+MDTemplateValueParameter *
+DIBuilder::createTemplateTemplateParameter(MDScope *Context, StringRef Name,
+                                           MDType *Ty, StringRef Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_GNU_template_template_param, Context, Name, Ty,
       MDString::get(VMContext, Val));
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateParameterPack(DIDescriptor Context, StringRef Name,
-                                       DIType Ty, DIArray Val) {
+MDTemplateValueParameter *
+DIBuilder::createTemplateParameterPack(MDScope *Context, StringRef Name,
+                                       MDType *Ty, DIArray Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_GNU_template_parameter_pack, Context, Name, Ty,
-      Val);
+      Val.get());
 }
 
-DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
-                                           DIFile File, unsigned LineNumber,
-                                           uint64_t SizeInBits,
-                                           uint64_t AlignInBits,
-                                           uint64_t OffsetInBits,
-                                           unsigned Flags, DIType DerivedFrom,
-                                           DIArray Elements,
-                                           DIType VTableHolder,
-                                           MDNode *TemplateParams,
-                                           StringRef UniqueIdentifier) {
-  assert((!Context || Context.isScope() || Context.isType()) &&
+MDCompositeType *DIBuilder::createClassType(
+    MDScope *Context, StringRef Name, MDFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
+    unsigned Flags, MDType *DerivedFrom, DIArray Elements, MDType *VTableHolder,
+    MDNode *TemplateParams, StringRef UniqueIdentifier) {
+  assert((!Context || isa<MDScope>(Context)) &&
          "createClassType should be called with a valid Context");
-  // TAG_class_type is encoded in DICompositeType format.
-  DICompositeType R = MDCompositeType::get(
+
+  auto *R = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(),
-      SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, 0,
-      VTableHolder.getRef(), TemplateParams, UniqueIdentifier);
+      MDScopeRef::get(getNonCompileUnitScope(Context)),
+      MDTypeRef::get(DerivedFrom), SizeInBits, AlignInBits, OffsetInBits, Flags,
+      Elements, 0, MDTypeRef::get(VTableHolder),
+      cast_or_null<MDTuple>(TemplateParams), UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(R);
   trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createStructType(DIDescriptor Context,
-                                            StringRef Name, DIFile File,
-                                            unsigned LineNumber,
-                                            uint64_t SizeInBits,
-                                            uint64_t AlignInBits,
-                                            unsigned Flags, DIType DerivedFrom,
-                                            DIArray Elements,
-                                            unsigned RunTimeLang,
-                                            DIType VTableHolder,
-                                            StringRef UniqueIdentifier) {
-  DICompositeType R = MDCompositeType::get(
+MDCompositeType *DIBuilder::createStructType(
+    MDScope *Context, StringRef Name, MDFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+    MDType *DerivedFrom, DIArray Elements, unsigned RunTimeLang,
+    MDType *VTableHolder, StringRef UniqueIdentifier) {
+  auto *R = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(),
-      SizeInBits, AlignInBits, 0, Flags, Elements, RunTimeLang,
-      VTableHolder.getRef(), nullptr, UniqueIdentifier);
+      MDScopeRef::get(getNonCompileUnitScope(Context)),
+      MDTypeRef::get(DerivedFrom), SizeInBits, AlignInBits, 0, Flags, Elements,
+      RunTimeLang, MDTypeRef::get(VTableHolder), nullptr, UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(R);
   trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
-                                           DIFile File, unsigned LineNumber,
+MDCompositeType* DIBuilder::createUnionType(MDScope * Scope, StringRef Name,
+                                           MDFile* File, unsigned LineNumber,
                                            uint64_t SizeInBits,
                                            uint64_t AlignInBits, unsigned Flags,
                                            DIArray Elements,
                                            unsigned RunTimeLang,
                                            StringRef UniqueIdentifier) {
-  DICompositeType R = MDCompositeType::get(
+  auto *R = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_union_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits,
       AlignInBits, 0, Flags, Elements, RunTimeLang, nullptr, nullptr,
       UniqueIdentifier);
   if (!UniqueIdentifier.empty())
@@ -447,21 +424,21 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
   return R;
 }
 
-DISubroutineType DIBuilder::createSubroutineType(DIFile File,
-                                                 DITypeArray ParameterTypes,
-                                                 unsigned Flags) {
+MDSubroutineType *DIBuilder::createSubroutineType(MDFile *File,
+                                                  DITypeArray ParameterTypes,
+                                                  unsigned Flags) {
   return MDSubroutineType::get(VMContext, Flags, ParameterTypes);
 }
 
-DICompositeType DIBuilder::createEnumerationType(
-    DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+MDCompositeType *DIBuilder::createEnumerationType(
+    MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNumber,
     uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-    DIType UnderlyingType, StringRef UniqueIdentifier) {
-  DICompositeType CTy = MDCompositeType::get(
+    MDType *UnderlyingType, StringRef UniqueIdentifier) {
+  auto *CTy = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), UnderlyingType.getRef(),
-      SizeInBits, AlignInBits, 0, 0, Elements, 0, nullptr, nullptr,
-      UniqueIdentifier);
+      MDScopeRef::get(getNonCompileUnitScope(Scope)),
+      MDTypeRef::get(UnderlyingType), SizeInBits, AlignInBits, 0, 0, Elements,
+      0, nullptr, nullptr, UniqueIdentifier);
   AllEnumTypes.push_back(CTy);
   if (!UniqueIdentifier.empty())
     retainType(CTy);
@@ -469,63 +446,66 @@ DICompositeType DIBuilder::createEnumerationType(
   return CTy;
 }
 
-DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
-                                           DIType Ty, DIArray Subscripts) {
+MDCompositeType *DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+                                            MDType *Ty, DIArray Subscripts) {
   auto *R = MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "",
-                                 nullptr, 0, nullptr, Ty.getRef(), Size,
+                                 nullptr, 0, nullptr, MDTypeRef::get(Ty), Size,
                                  AlignInBits, 0, 0, Subscripts, 0, nullptr);
   trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
-                                            DIType Ty, DIArray Subscripts) {
-  auto *R = MDCompositeType::get(
-      VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty.getRef(),
-      Size, AlignInBits, 0, DIType::FlagVector, Subscripts, 0, nullptr);
+MDCompositeType *DIBuilder::createVectorType(uint64_t Size,
+                                             uint64_t AlignInBits, MDType *Ty,
+                                             DIArray Subscripts) {
+  auto *R =
+      MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0,
+                           nullptr, MDTypeRef::get(Ty), Size, AlignInBits, 0,
+                           DebugNode::FlagVector, Subscripts, 0, nullptr);
   trackIfUnresolved(R);
   return R;
 }
 
-static DIType createTypeWithFlags(LLVMContext &Context, DIType Ty,
-                                  unsigned FlagsToSet) {
-  TempMDType NewTy = cast<MDType>(static_cast<MDNode *>(Ty))->clone();
+static MDType *createTypeWithFlags(LLVMContext &Context, MDType *Ty,
+                                   unsigned FlagsToSet) {
+  auto NewTy = Ty->clone();
   NewTy->setFlags(NewTy->getFlags() | FlagsToSet);
   return MDNode::replaceWithUniqued(std::move(NewTy));
 }
 
-DIType DIBuilder::createArtificialType(DIType Ty) {
+MDType *DIBuilder::createArtificialType(MDType *Ty) {
   // FIXME: Restrict this to the nodes where it's valid.
-  if (Ty.isArtificial())
+  if (Ty->isArtificial())
     return Ty;
-  return createTypeWithFlags(VMContext, Ty, DIType::FlagArtificial);
+  return createTypeWithFlags(VMContext, Ty, DebugNode::FlagArtificial);
 }
 
-DIType DIBuilder::createObjectPointerType(DIType Ty) {
+MDType *DIBuilder::createObjectPointerType(MDType *Ty) {
   // FIXME: Restrict this to the nodes where it's valid.
-  if (Ty.isObjectPointer())
+  if (Ty->isObjectPointer())
     return Ty;
-  unsigned Flags = DIType::FlagObjectPointer | DIType::FlagArtificial;
+  unsigned Flags = DebugNode::FlagObjectPointer | DebugNode::FlagArtificial;
   return createTypeWithFlags(VMContext, Ty, Flags);
 }
 
-void DIBuilder::retainType(DIType T) { AllRetainTypes.emplace_back(T); }
-
-DIBasicType DIBuilder::createUnspecifiedParameter() {
-  return DIBasicType();
+void DIBuilder::retainType(MDType *T) {
+  assert(T && "Expected non-null type");
+  AllRetainTypes.emplace_back(T);
 }
 
-DICompositeType
-DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
-                             DIFile F, unsigned Line, unsigned RuntimeLang,
+MDBasicType *DIBuilder::createUnspecifiedParameter() { return nullptr; }
+
+MDCompositeType*
+DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, MDScope * Scope,
+                             MDFile* F, unsigned Line, unsigned RuntimeLang,
                              uint64_t SizeInBits, uint64_t AlignInBits,
                              StringRef UniqueIdentifier) {
   // FIXME: Define in terms of createReplaceableForwardDecl() by calling
   // replaceWithUniqued().
-  DICompositeType RetTy = MDCompositeType::get(
-      VMContext, Tag, Name, F.getFileNode(), Line,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
-      AlignInBits, 0, DIDescriptor::FlagFwdDecl, nullptr, RuntimeLang, nullptr,
+  auto *RetTy = MDCompositeType::get(
+      VMContext, Tag, Name, F, Line,
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits,
+      AlignInBits, 0, DebugNode::FlagFwdDecl, nullptr, RuntimeLang, nullptr,
       nullptr, UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(RetTy);
@@ -533,16 +513,15 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
   return RetTy;
 }
 
-DICompositeType DIBuilder::createReplaceableCompositeType(
-    unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line,
+MDCompositeType* DIBuilder::createReplaceableCompositeType(
+    unsigned Tag, StringRef Name, MDScope * Scope, MDFile* F, unsigned Line,
     unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits,
     unsigned Flags, StringRef UniqueIdentifier) {
-  DICompositeType RetTy =
-      MDCompositeType::getTemporary(
-          VMContext, Tag, Name, F.getFileNode(), Line,
-          DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
-          AlignInBits, 0, Flags, nullptr, RuntimeLang,
-          nullptr, nullptr, UniqueIdentifier).release();
+  auto *RetTy = MDCompositeType::getTemporary(
+                    VMContext, Tag, Name, F, Line,
+                    MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr,
+                    SizeInBits, AlignInBits, 0, Flags, nullptr, RuntimeLang,
+                    nullptr, nullptr, UniqueIdentifier).release();
   if (!UniqueIdentifier.empty())
     retainType(RetTy);
   trackIfUnresolved(RetTy);
@@ -550,102 +529,102 @@ DICompositeType DIBuilder::createReplaceableCompositeType(
 }
 
 DIArray DIBuilder::getOrCreateArray(ArrayRef<Metadata *> Elements) {
-  return DIArray(MDNode::get(VMContext, Elements));
+  return MDTuple::get(VMContext, Elements);
 }
 
 DITypeArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) {
   SmallVector<llvm::Metadata *, 16> Elts;
   for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
     if (Elements[i] && isa<MDNode>(Elements[i]))
-      Elts.push_back(DIType(cast<MDNode>(Elements[i])).getRef());
+      Elts.push_back(MDTypeRef::get(cast<MDType>(Elements[i])));
     else
       Elts.push_back(Elements[i]);
   }
   return DITypeArray(MDNode::get(VMContext, Elts));
 }
 
-DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
+MDSubrange *DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
   return MDSubrange::get(VMContext, Count, Lo);
 }
 
-static void checkGlobalVariableScope(DIDescriptor Context) {
-  MDNode *TheCtx = getNonCompileUnitScope(Context);
-  if (DIScope(TheCtx).isCompositeType()) {
-    assert(!DICompositeType(TheCtx).getIdentifier() &&
+static void checkGlobalVariableScope(MDScope * Context) {
+#ifndef NDEBUG
+  if (auto *CT =
+          dyn_cast_or_null<MDCompositeType>(getNonCompileUnitScope(Context)))
+    assert(CT->getIdentifier().empty() &&
            "Context of a global variable should not be a type with identifier");
-  }
+#endif
 }
 
-DIGlobalVariable DIBuilder::createGlobalVariable(
-    DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
-    unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
+MDGlobalVariable *DIBuilder::createGlobalVariable(
+    MDScope *Context, StringRef Name, StringRef LinkageName, MDFile *F,
+    unsigned LineNumber, MDType *Ty, bool isLocalToUnit, Constant *Val,
     MDNode *Decl) {
   checkGlobalVariableScope(Context);
 
-  auto *N = MDGlobalVariable::get(VMContext, Context, Name, LinkageName, F,
-                                  LineNumber, Ty, isLocalToUnit, true,
-                                  getConstantOrNull(Val), Decl);
+  auto *N = MDGlobalVariable::get(VMContext, cast_or_null<MDScope>(Context),
+                                  Name, LinkageName, F, LineNumber,
+                                  MDTypeRef::get(Ty), isLocalToUnit, true, Val,
+                                  cast_or_null<MDDerivedType>(Decl));
   AllGVs.push_back(N);
   return N;
 }
 
-DIGlobalVariable DIBuilder::createTempGlobalVariableFwdDecl(
-    DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
-    unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
+MDGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
+    MDScope *Context, StringRef Name, StringRef LinkageName, MDFile *F,
+    unsigned LineNumber, MDType *Ty, bool isLocalToUnit, Constant *Val,
     MDNode *Decl) {
   checkGlobalVariableScope(Context);
 
-  return MDGlobalVariable::getTemporary(VMContext, Context, Name, LinkageName,
-                                        F, LineNumber, Ty, isLocalToUnit, false,
-                                        getConstantOrNull(Val), Decl).release();
+  return MDGlobalVariable::getTemporary(
+             VMContext, cast_or_null<MDScope>(Context), Name, LinkageName, F,
+             LineNumber, MDTypeRef::get(Ty), isLocalToUnit, false, Val,
+             cast_or_null<MDDerivedType>(Decl))
+      .release();
 }
 
-DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
-                                          StringRef Name, DIFile File,
-                                          unsigned LineNo, DITypeRef Ty,
-                                          bool AlwaysPreserve, unsigned Flags,
-                                          unsigned ArgNo) {
+MDLocalVariable *DIBuilder::createLocalVariable(
+    unsigned Tag, MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNo,
+    MDType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
   // FIXME: Why getNonCompileUnitScope()?
   // FIXME: Why is "!Context" okay here?
   // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT
   // the only valid scopes)?
-  DIDescriptor Context(getNonCompileUnitScope(Scope));
-  assert((!Context || Context.isScope()) &&
-         "createLocalVariable should be called with a valid Context");
+  MDScope* Context = getNonCompileUnitScope(Scope);
 
-  auto *Node =
-      MDLocalVariable::get(VMContext, Tag, getNonCompileUnitScope(Scope), Name,
-                           File, LineNo, Ty, ArgNo, Flags);
+  auto *Node = MDLocalVariable::get(
+      VMContext, Tag, cast_or_null<MDLocalScope>(Context), Name, File, LineNo,
+      MDTypeRef::get(Ty), ArgNo, Flags);
   if (AlwaysPreserve) {
     // The optimizer may remove local variable. If there is an interest
     // to preserve variable info in such situation then stash it in a
     // named mdnode.
-    DISubprogram Fn(getDISubprogram(Scope));
+    MDSubprogram *Fn = getDISubprogram(Scope);
     assert(Fn && "Missing subprogram for local variable");
     PreservedVariables[Fn].emplace_back(Node);
   }
   return Node;
 }
 
-DIExpression DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
+MDExpression* DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
   return MDExpression::get(VMContext, Addr);
 }
 
-DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
+MDExpression* DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
   // TODO: Remove the callers of this signed version and delete.
   SmallVector<uint64_t, 8> Addr(Signed.begin(), Signed.end());
   return createExpression(Addr);
 }
 
-DIExpression DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
+MDExpression* DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
                                                  unsigned SizeInBytes) {
   uint64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBytes, SizeInBytes};
   return MDExpression::get(VMContext, Addr);
 }
 
-DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
-                                       StringRef LinkageName, DIFile File,
-                                       unsigned LineNo, DICompositeType Ty,
+MDSubprogram* DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
+                                       StringRef LinkageName, MDFile* File,
+                                       unsigned LineNo, MDSubroutineType* Ty,
                                        bool isLocalToUnit, bool isDefinition,
                                        unsigned ScopeLine, unsigned Flags,
                                        bool isOptimized, Function *Fn,
@@ -658,20 +637,21 @@ DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
                         Flags, isOptimized, Fn, TParams, Decl);
 }
 
-DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
-                                       StringRef LinkageName, DIFile File,
-                                       unsigned LineNo, DICompositeType Ty,
+MDSubprogram* DIBuilder::createFunction(MDScope * Context, StringRef Name,
+                                       StringRef LinkageName, MDFile* File,
+                                       unsigned LineNo, MDSubroutineType* Ty,
                                        bool isLocalToUnit, bool isDefinition,
                                        unsigned ScopeLine, unsigned Flags,
                                        bool isOptimized, Function *Fn,
                                        MDNode *TParams, MDNode *Decl) {
-  assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
          "function types should be subroutines");
   auto *Node = MDSubprogram::get(
-      VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name,
-      LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit, isDefinition,
-      ScopeLine, nullptr, 0, 0, Flags, isOptimized, getConstantOrNull(Fn),
-      TParams, Decl, MDNode::getTemporary(VMContext, None).release());
+      VMContext, MDScopeRef::get(getNonCompileUnitScope(Context)), Name,
+      LinkageName, File, LineNo, Ty,
+      isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized,
+      Fn, cast_or_null<MDTuple>(TParams), cast_or_null<MDSubprogram>(Decl),
+      MDTuple::getTemporary(VMContext, None).release());
 
   if (isDefinition)
     AllSubprograms.push_back(Node);
@@ -679,78 +659,64 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
   return Node;
 }
 
-DISubprogram
-DIBuilder::createTempFunctionFwdDecl(DIDescriptor Context, StringRef Name,
-                                     StringRef LinkageName, DIFile File,
-                                     unsigned LineNo, DICompositeType Ty,
+MDSubprogram*
+DIBuilder::createTempFunctionFwdDecl(MDScope * Context, StringRef Name,
+                                     StringRef LinkageName, MDFile* File,
+                                     unsigned LineNo, MDSubroutineType* Ty,
                                      bool isLocalToUnit, bool isDefinition,
                                      unsigned ScopeLine, unsigned Flags,
                                      bool isOptimized, Function *Fn,
                                      MDNode *TParams, MDNode *Decl) {
   return MDSubprogram::getTemporary(
-             VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name,
-             LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit,
-             isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized,
-             getConstantOrNull(Fn), TParams, Decl, nullptr).release();
-}
-
-DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
-                                     StringRef LinkageName, DIFile F,
-                                     unsigned LineNo, DICompositeType Ty,
-                                     bool isLocalToUnit, bool isDefinition,
-                                     unsigned VK, unsigned VIndex,
-                                     DIType VTableHolder, unsigned Flags,
-                                     bool isOptimized, Function *Fn,
-                                     MDNode *TParam) {
-  assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+             VMContext, MDScopeRef::get(getNonCompileUnitScope(Context)), Name,
+             LinkageName, File, LineNo, Ty,
+             isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, Flags,
+             isOptimized, Fn, cast_or_null<MDTuple>(TParams),
+             cast_or_null<MDSubprogram>(Decl), nullptr).release();
+}
+
+MDSubprogram *
+DIBuilder::createMethod(MDScope *Context, StringRef Name, StringRef LinkageName,
+                        MDFile *F, unsigned LineNo, MDSubroutineType *Ty,
+                        bool isLocalToUnit, bool isDefinition, unsigned VK,
+                        unsigned VIndex, MDType *VTableHolder, unsigned Flags,
+                        bool isOptimized, Function *Fn, MDNode *TParam) {
+  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
          "function types should be subroutines");
   assert(getNonCompileUnitScope(Context) &&
          "Methods should have both a Context and a context that isn't "
          "the compile unit.");
   // FIXME: Do we want to use different scope/lines?
-  auto *Node = MDSubprogram::get(
-      VMContext, DIScope(Context).getRef(), Name, LinkageName, F.getFileNode(),
-      LineNo, Ty, isLocalToUnit, isDefinition, LineNo, VTableHolder.getRef(),
-      VK, VIndex, Flags, isOptimized, getConstantOrNull(Fn), TParam, nullptr,
-      nullptr);
+  auto *SP = MDSubprogram::get(
+      VMContext, MDScopeRef::get(cast<MDScope>(Context)), Name, LinkageName, F,
+      LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
+      MDTypeRef::get(VTableHolder), VK, VIndex, Flags, isOptimized, Fn,
+      cast_or_null<MDTuple>(TParam), nullptr, nullptr);
 
   if (isDefinition)
-    AllSubprograms.push_back(Node);
-  DISubprogram S(Node);
-  assert(S.isSubprogram() && "createMethod should return a valid DISubprogram");
-  trackIfUnresolved(S);
-  return S;
-}
-
-DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
-                                       DIFile File, unsigned LineNo) {
-  DINameSpace R = MDNamespace::get(VMContext, getNonCompileUnitScope(Scope),
-                                   File.getFileNode(), Name, LineNo);
-  assert(R.Verify() &&
-         "createNameSpace should return a verifiable DINameSpace");
-  return R;
+    AllSubprograms.push_back(SP);
+  trackIfUnresolved(SP);
+  return SP;
+}
+
+MDNamespace* DIBuilder::createNameSpace(MDScope * Scope, StringRef Name,
+                                       MDFile* File, unsigned LineNo) {
+  return MDNamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name,
+                          LineNo);
 }
 
-DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
-                                                     DIFile File,
+MDLexicalBlockFile* DIBuilder::createLexicalBlockFile(MDScope * Scope,
+                                                     MDFile* File,
                                                      unsigned Discriminator) {
-  DILexicalBlockFile R = MDLexicalBlockFile::get(
-      VMContext, Scope, File.getFileNode(), Discriminator);
-  assert(
-      R.Verify() &&
-      "createLexicalBlockFile should return a verifiable DILexicalBlockFile");
-  return R;
+  return MDLexicalBlockFile::get(VMContext, Scope, File, Discriminator);
 }
 
-DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+MDLexicalBlock* DIBuilder::createLexicalBlock(MDScope * Scope, MDFile* File,
                                              unsigned Line, unsigned Col) {
   // Make these distinct, to avoid merging two lexical blocks on the same
   // file/line/column.
-  DILexicalBlock R = MDLexicalBlock::getDistinct(
-      VMContext, getNonCompileUnitScope(Scope), File.getFileNode(), Line, Col);
-  assert(R.Verify() &&
-         "createLexicalBlock should return a verifiable DILexicalBlock");
-  return R;
+  return MDLexicalBlock::getDistinct(VMContext, getNonCompileUnitScope(Scope),
+                                     File, Line, Col);
 }
 
 static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) {
@@ -758,11 +724,19 @@ static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) {
   return MetadataAsValue::get(VMContext, ValueAsMetadata::get(V));
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      DIExpression Expr,
+static Instruction *withDebugLoc(Instruction *I, const MDLocation *DL) {
+  I->setDebugLoc(const_cast<MDLocation *>(DL));
+  return I;
+}
+
+Instruction *DIBuilder::insertDeclare(Value *Storage, MDLocalVariable* VarInfo,
+                                      MDExpression* Expr, const MDLocation *DL,
                                       Instruction *InsertBefore) {
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.declare");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.declare");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
@@ -771,14 +745,17 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   Value *Args[] = {getDbgIntrinsicValueImpl(VMContext, Storage),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
+  return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertBefore), DL);
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      DIExpression Expr,
+Instruction *DIBuilder::insertDeclare(Value *Storage, MDLocalVariable* VarInfo,
+                                      MDExpression* Expr, const MDLocation *DL,
                                       BasicBlock *InsertAtEnd) {
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.declare");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.declare");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
@@ -791,18 +768,21 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
   if (TerminatorInst *T = InsertAtEnd->getTerminator())
-    return CallInst::Create(DeclareFn, Args, "", T);
-  else
-    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
+    return withDebugLoc(CallInst::Create(DeclareFn, Args, "", T), DL);
+  return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertAtEnd), DL);
 }
 
 Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                DIExpression Expr,
+                                                MDLocalVariable* VarInfo,
+                                                MDExpression* Expr,
+                                                const MDLocation *DL,
                                                 Instruction *InsertBefore) {
   assert(V && "no value passed to dbg.value");
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.value");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.value");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
@@ -812,16 +792,20 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                    ConstantInt::get(Type::getInt64Ty(VMContext), Offset),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(ValueFn, Args, "", InsertBefore);
+  return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertBefore), DL);
 }
 
 Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                DIExpression Expr,
+                                                MDLocalVariable* VarInfo,
+                                                MDExpression* Expr,
+                                                const MDLocation *DL,
                                                 BasicBlock *InsertAtEnd) {
   assert(V && "no value passed to dbg.value");
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.value");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.value");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
@@ -831,11 +815,16 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                    ConstantInt::get(Type::getInt64Ty(VMContext), Offset),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
+
+  return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertAtEnd), DL);
 }
 
-void DIBuilder::replaceVTableHolder(DICompositeType &T, DICompositeType VTableHolder) {
-  T.setContainingType(VTableHolder);
+void DIBuilder::replaceVTableHolder(MDCompositeType* &T, MDCompositeType* VTableHolder) {
+  {
+    TypedTrackingMDRef<MDCompositeType> N(T);
+    N->replaceVTableHolder(MDTypeRef::get(VTableHolder));
+    T = N.get();
+  }
 
   // If this didn't create a self-reference, just return.
   if (T != VTableHolder)
@@ -849,9 +838,16 @@ void DIBuilder::replaceVTableHolder(DICompositeType &T, DICompositeType VTableHo
         trackIfUnresolved(N);
 }
 
-void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements,
+void DIBuilder::replaceArrays(MDCompositeType* &T, DIArray Elements,
                               DIArray TParams) {
-  T.setArrays(Elements, TParams);
+  {
+    TypedTrackingMDRef<MDCompositeType> N(T);
+    if (Elements)
+      N->replaceElements(Elements);
+    if (TParams)
+      N->replaceTemplateParams(MDTemplateParameterArray(TParams));
+    T = N.get();
+  }
 
   // If T isn't resolved, there's no problem.
   if (!T->isResolved())
@@ -861,7 +857,7 @@ void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements,
   // arrays explicitly if they're unresolved, or else the cycles will be
   // orphaned.
   if (Elements)
-    trackIfUnresolved(Elements);
+    trackIfUnresolved(Elements.get());
   if (TParams)
-    trackIfUnresolved(TParams);
+    trackIfUnresolved(TParams.get());
 }
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 9a6b953..719c28b 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -17,7 +17,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
@@ -25,6 +24,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
@@ -33,612 +33,62 @@
 using namespace llvm;
 using namespace llvm::dwarf;
 
-//===----------------------------------------------------------------------===//
-// DIDescriptor
-//===----------------------------------------------------------------------===//
-
-unsigned DIDescriptor::getFlag(StringRef Flag) {
-  return StringSwitch<unsigned>(Flag)
-#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
-#include "llvm/IR/DebugInfoFlags.def"
-      .Default(0);
-}
-
-const char *DIDescriptor::getFlagString(unsigned Flag) {
-  switch (Flag) {
-  default:
-    return "";
-#define HANDLE_DI_FLAG(ID, NAME)                                               \
-  case Flag##NAME:                                                             \
-    return "DIFlag" #NAME;
-#include "llvm/IR/DebugInfoFlags.def"
-  }
-}
-
-unsigned DIDescriptor::splitFlags(unsigned Flags,
-                                  SmallVectorImpl<unsigned> &SplitFlags) {
-  // Accessibility flags need to be specially handled, since they're packed
-  // together.
-  if (unsigned A = Flags & FlagAccessibility) {
-    if (A == FlagPrivate)
-      SplitFlags.push_back(FlagPrivate);
-    else if (A == FlagProtected)
-      SplitFlags.push_back(FlagProtected);
-    else
-      SplitFlags.push_back(FlagPublic);
-    Flags &= ~A;
-  }
-
-#define HANDLE_DI_FLAG(ID, NAME)                                               \
-  if (unsigned Bit = Flags & ID) {                                             \
-    SplitFlags.push_back(Bit);                                                 \
-    Flags &= ~Bit;                                                             \
-  }
-#include "llvm/IR/DebugInfoFlags.def"
-
-  return Flags;
-}
-
-bool DIDescriptor::Verify() const {
-  return DbgNode &&
-         (DIDerivedType(DbgNode).Verify() ||
-          DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() ||
-          DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() ||
-          DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() ||
-          DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() ||
-          DILexicalBlock(DbgNode).Verify() ||
-          DILexicalBlockFile(DbgNode).Verify() ||
-          DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
-          DIObjCProperty(DbgNode).Verify() ||
-          DITemplateTypeParameter(DbgNode).Verify() ||
-          DITemplateValueParameter(DbgNode).Verify() ||
-          DIImportedEntity(DbgNode).Verify());
-}
-
-static Metadata *getField(const MDNode *DbgNode, unsigned Elt) {
-  if (!DbgNode || Elt >= DbgNode->getNumOperands())
-    return nullptr;
-  return DbgNode->getOperand(Elt);
-}
-
-static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
-  return dyn_cast_or_null<MDNode>(getField(DbgNode, Elt));
-}
-
-static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
-  if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
-    return MDS->getString();
-  return StringRef();
-}
-
-StringRef DIDescriptor::getStringField(unsigned Elt) const {
-  return ::getStringField(DbgNode, Elt);
-}
-
-uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (auto *C = getConstantField(Elt))
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return CI->getZExtValue();
-
-  return 0;
-}
-
-int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
-  if (auto *C = getConstantField(Elt))
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return CI->getZExtValue();
-
-  return 0;
-}
-
-DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  MDNode *Field = getNodeField(DbgNode, Elt);
-  return DIDescriptor(Field);
-}
-
-GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  return dyn_cast_or_null<GlobalVariable>(getConstantField(Elt));
-}
-
-Constant *DIDescriptor::getConstantField(unsigned Elt) const {
-  if (!DbgNode)
-    return nullptr;
-
-  if (Elt < DbgNode->getNumOperands())
-    if (auto *C =
-            dyn_cast_or_null<ConstantAsMetadata>(DbgNode->getOperand(Elt)))
-      return C->getValue();
-  return nullptr;
-}
-
-Function *DIDescriptor::getFunctionField(unsigned Elt) const {
-  return dyn_cast_or_null<Function>(getConstantField(Elt));
-}
-
-/// \brief Return the size reported by the variable's type.
-unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) {
-  DIType Ty = getType().resolve(Map);
-  // Follow derived types until we reach a type that
-  // reports back a size.
-  while (Ty.isDerivedType() && !Ty.getSizeInBits()) {
-    DIDerivedType DT(&*Ty);
-    Ty = DT.getTypeDerivedFrom().resolve(Map);
-  }
-  assert(Ty.getSizeInBits() && "type with size 0");
-  return Ty.getSizeInBits();
-}
-
-bool DIExpression::isBitPiece() const {
-  unsigned N = getNumElements();
-  return N >=3 && getElement(N-3) == dwarf::DW_OP_bit_piece;
-}
-
-uint64_t DIExpression::getBitPieceOffset() const {
-  assert(isBitPiece() && "not a piece");
-  return getElement(getNumElements()-2);
-}
-
-uint64_t DIExpression::getBitPieceSize() const {
-  assert(isBitPiece() && "not a piece");
-  return getElement(getNumElements()-1);
-}
-
-DIExpression::iterator DIExpression::Operand::getNext() const {
-  iterator it(I);
-  return ++it;
-}
-
-//===----------------------------------------------------------------------===//
-// Simple Descriptor Constructors and other Methods
-//===----------------------------------------------------------------------===//
-
-void DIDescriptor::replaceAllUsesWith(LLVMContext &, DIDescriptor D) {
-  assert(DbgNode && "Trying to replace an unverified type!");
-  assert(DbgNode->isTemporary() && "Expected temporary node");
-  TempMDNode Temp(get());
-
-  // Since we use a TrackingVH for the node, its easy for clients to manufacture
-  // legitimate situations where they want to replaceAllUsesWith() on something
-  // which, due to uniquing, has merged with the source. We shield clients from
-  // this detail by allowing a value to be replaced with replaceAllUsesWith()
-  // itself.
-  if (Temp.get() == D.get()) {
-    DbgNode = MDNode::replaceWithUniqued(std::move(Temp));
-    return;
-  }
-
-  Temp->replaceAllUsesWith(D.get());
-  DbgNode = D.get();
-}
-
-void DIDescriptor::replaceAllUsesWith(MDNode *D) {
-  assert(DbgNode && "Trying to replace an unverified type!");
-  assert(DbgNode != D && "This replacement should always happen");
-  assert(DbgNode->isTemporary() && "Expected temporary node");
-  TempMDNode Node(get());
-  Node->replaceAllUsesWith(D);
-}
-
-bool DICompileUnit::Verify() const {
-  if (!isCompileUnit())
-    return false;
-
-  // Don't bother verifying the compilation directory or producer string
-  // as those could be empty.
-  return !getFilename().empty();
-}
-
-bool DIObjCProperty::Verify() const { return isObjCProperty(); }
-
-/// \brief Check if a value can be a reference to a type.
-static bool isTypeRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  return isa<MDType>(MD);
-}
-
-/// \brief Check if a value can be a ScopeRef.
-static bool isScopeRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  return isa<MDScope>(MD);
-}
-
-#ifndef NDEBUG
-/// \brief Check if a value can be a DescriptorRef.
-static bool isDescriptorRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  return isa<MDNode>(MD);
-}
-#endif
-
-bool DIType::Verify() const {
-  auto *N = dyn_cast_or_null<MDType>(DbgNode);
-  if (!N)
-    return false;
-  if (!isScopeRef(N->getScope()))
-    return false;
-
-  // DIType is abstract, it should be a BasicType, a DerivedType or
-  // a CompositeType.
-  if (isBasicType())
-    return DIBasicType(DbgNode).Verify();
-
-  // FIXME: Sink this into the various subclass verifies.
-  if (getFilename().empty()) {
-    // Check whether the filename is allowed to be empty.
-    uint16_t Tag = getTag();
-    if (Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
-        Tag != dwarf::DW_TAG_pointer_type &&
-        Tag != dwarf::DW_TAG_ptr_to_member_type &&
-        Tag != dwarf::DW_TAG_reference_type &&
-        Tag != dwarf::DW_TAG_rvalue_reference_type &&
-        Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type &&
-        Tag != dwarf::DW_TAG_enumeration_type &&
-        Tag != dwarf::DW_TAG_subroutine_type &&
-        Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend &&
-        Tag != dwarf::DW_TAG_structure_type && Tag != dwarf::DW_TAG_member &&
-        Tag != dwarf::DW_TAG_typedef)
-      return false;
-  }
-
-  if (isCompositeType())
-    return DICompositeType(DbgNode).Verify();
-  if (isDerivedType())
-    return DIDerivedType(DbgNode).Verify();
-  return false;
-}
-
-bool DIBasicType::Verify() const {
-  return dyn_cast_or_null<MDBasicType>(DbgNode);
-}
-
-bool DIDerivedType::Verify() const {
-  auto *N = dyn_cast_or_null<MDDerivedTypeBase>(DbgNode);
-  if (!N)
-    return false;
-  if (getTag() == dwarf::DW_TAG_ptr_to_member_type) {
-    auto *D = dyn_cast<MDDerivedType>(N);
-    if (!D)
-      return false;
-    if (!isTypeRef(D->getExtraData()))
-      return false;
-  }
-  return isTypeRef(N->getBaseType());
-}
-
-bool DICompositeType::Verify() const {
-  auto *N = dyn_cast_or_null<MDCompositeTypeBase>(DbgNode);
-  return N && isTypeRef(N->getBaseType()) && isTypeRef(N->getVTableHolder()) &&
-         !(isLValueReference() && isRValueReference());
-}
-
-bool DISubprogram::Verify() const {
-  auto *N = dyn_cast_or_null<MDSubprogram>(DbgNode);
-  if (!N)
-    return false;
-
-  if (!isScopeRef(N->getScope()))
-    return false;
-
-  if (auto *Op = N->getType())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  if (!isTypeRef(getContainingType()))
-    return false;
-
-  if (isLValueReference() && isRValueReference())
-    return false;
-
-  // If a DISubprogram has an llvm::Function*, then scope chains from all
-  // instructions within the function should lead to this DISubprogram.
-  if (auto *F = getFunction()) {
-    for (auto &BB : *F) {
-      for (auto &I : BB) {
-        DebugLoc DL = I.getDebugLoc();
-        if (DL.isUnknown())
-          continue;
-
-        MDNode *Scope = nullptr;
-        MDNode *IA = nullptr;
-        // walk the inlined-at scopes
-        while ((IA = DL.getInlinedAt()))
-          DL = DebugLoc::getFromDILocation(IA);
-        DL.getScopeAndInlinedAt(Scope, IA);
-        if (!Scope)
-          return false;
-        assert(!IA);
-        while (!DIDescriptor(Scope).isSubprogram()) {
-          DILexicalBlockFile D(Scope);
-          Scope = D.isLexicalBlockFile()
-                      ? D.getScope()
-                      : DebugLoc::getFromDILexicalBlock(Scope).getScope();
-          if (!Scope)
-            return false;
-        }
-        if (!DISubprogram(Scope).describes(F))
-          return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-bool DIGlobalVariable::Verify() const {
-  auto *N = dyn_cast_or_null<MDGlobalVariable>(DbgNode);
-
-  if (!N)
-    return false;
-
-  if (N->getDisplayName().empty())
-    return false;
-
-  if (auto *Op = N->getScope())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  if (auto *Op = N->getStaticDataMemberDeclaration())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  return isTypeRef(N->getType());
-}
-
-bool DIVariable::Verify() const {
-  auto *N = dyn_cast_or_null<MDLocalVariable>(DbgNode);
-
-  if (!N)
-    return false;
-
-  if (auto *Op = N->getScope())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  return isTypeRef(N->getType());
-}
-
-bool DILocation::Verify() const {
-  return dyn_cast_or_null<MDLocation>(DbgNode);
-}
-bool DINameSpace::Verify() const {
-  return dyn_cast_or_null<MDNamespace>(DbgNode);
-}
-bool DIFile::Verify() const { return dyn_cast_or_null<MDFile>(DbgNode); }
-bool DIEnumerator::Verify() const {
-  return dyn_cast_or_null<MDEnumerator>(DbgNode);
-}
-bool DISubrange::Verify() const {
-  return dyn_cast_or_null<MDSubrange>(DbgNode);
-}
-bool DILexicalBlock::Verify() const {
-  return dyn_cast_or_null<MDLexicalBlock>(DbgNode);
-}
-bool DILexicalBlockFile::Verify() const {
-  return dyn_cast_or_null<MDLexicalBlockFile>(DbgNode);
-}
-bool DITemplateTypeParameter::Verify() const {
-  return dyn_cast_or_null<MDTemplateTypeParameter>(DbgNode);
-}
-bool DITemplateValueParameter::Verify() const {
-  return dyn_cast_or_null<MDTemplateValueParameter>(DbgNode);
-}
-bool DIImportedEntity::Verify() const {
-  return dyn_cast_or_null<MDImportedEntity>(DbgNode);
-}
-
-void DICompositeType::setArraysHelper(MDNode *Elements, MDNode *TParams) {
-  TypedTrackingMDRef<MDCompositeTypeBase> N(get());
-  if (Elements)
-    N->replaceElements(cast<MDTuple>(Elements));
-  if (TParams)
-    N->replaceTemplateParams(cast<MDTuple>(TParams));
-  DbgNode = N;
-}
-
-DIScopeRef DIScope::getRef() const {
-  if (!isCompositeType())
-    return DIScopeRef(*this);
-  DICompositeType DTy(DbgNode);
-  if (!DTy.getIdentifier())
-    return DIScopeRef(*this);
-  return DIScopeRef(DTy.getIdentifier());
-}
-
-void DICompositeType::setContainingType(DICompositeType ContainingType) {
-  TypedTrackingMDRef<MDCompositeTypeBase> N(get());
-  N->replaceVTableHolder(ContainingType.getRef());
-  DbgNode = N;
-}
-
-bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
-  assert(CurFn && "Invalid function");
-  if (!getContext().isSubprogram())
-    return false;
-  // This variable is not inlined function argument if its scope
-  // does not describe current function.
-  return !DISubprogram(getContext()).describes(CurFn);
-}
-
-Function *DISubprogram::getFunction() const {
-  if (auto *N = get())
-    if (auto *C = dyn_cast_or_null<ConstantAsMetadata>(N->getFunction()))
-      return dyn_cast<Function>(C->getValue());
-  return nullptr;
-}
-
-bool DISubprogram::describes(const Function *F) {
-  assert(F && "Invalid function");
-  if (F == getFunction())
-    return true;
-  StringRef Name = getLinkageName();
-  if (Name.empty())
-    Name = getName();
-  if (F->getName() == Name)
-    return true;
-  return false;
-}
-
-GlobalVariable *DIGlobalVariable::getGlobal() const {
-  return dyn_cast_or_null<GlobalVariable>(getConstant());
-}
-
-DIScopeRef DIScope::getContext() const {
-
-  if (isType())
-    return DIType(DbgNode).getContext();
-
-  if (isSubprogram())
-    return DIScopeRef(DISubprogram(DbgNode).getContext());
-
-  if (isLexicalBlock())
-    return DIScopeRef(DILexicalBlock(DbgNode).getContext());
-
-  if (isLexicalBlockFile())
-    return DIScopeRef(DILexicalBlockFile(DbgNode).getContext());
-
-  if (isNameSpace())
-    return DIScopeRef(DINameSpace(DbgNode).getContext());
-
-  assert((isFile() || isCompileUnit()) && "Unhandled type of scope.");
-  return DIScopeRef(nullptr);
-}
-
-StringRef DIScope::getName() const {
-  if (isType())
-    return DIType(DbgNode).getName();
-  if (isSubprogram())
-    return DISubprogram(DbgNode).getName();
-  if (isNameSpace())
-    return DINameSpace(DbgNode).getName();
-  assert((isLexicalBlock() || isLexicalBlockFile() || isFile() ||
-          isCompileUnit()) &&
-         "Unhandled type of scope.");
-  return StringRef();
-}
-
-StringRef DIScope::getFilename() const {
-  if (auto *N = get())
-    return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 0);
-  return "";
-}
-
-StringRef DIScope::getDirectory() const {
-  if (auto *N = get())
-    return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 1);
-  return "";
-}
-
-void DICompileUnit::replaceSubprograms(DIArray Subprograms) {
-  assert(Verify() && "Expected compile unit");
-  get()->replaceSubprograms(cast_or_null<MDTuple>(Subprograms.get()));
-}
-
-void DICompileUnit::replaceGlobalVariables(DIArray GlobalVariables) {
-  assert(Verify() && "Expected compile unit");
-  get()->replaceGlobalVariables(cast_or_null<MDTuple>(GlobalVariables.get()));
-}
-
-DILocation DILocation::copyWithNewScope(LLVMContext &Ctx,
-                                        DILexicalBlockFile NewScope) {
-  assert(Verify());
-  assert(NewScope && "Expected valid scope");
-
-  const auto *Old = cast<MDLocation>(DbgNode);
-  return DILocation(MDLocation::get(Ctx, Old->getLine(), Old->getColumn(),
-                                    NewScope, Old->getInlinedAt()));
-}
-
-unsigned DILocation::computeNewDiscriminator(LLVMContext &Ctx) {
-  std::pair<const char *, unsigned> Key(getFilename().data(), getLineNumber());
-  return ++Ctx.pImpl->DiscriminatorTable[Key];
-}
-
-DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
-                                       LLVMContext &VMContext) {
-  assert(DIVariable(DV).Verify() && "Expected a DIVariable");
-  return cast<MDLocalVariable>(DV)
-      ->withInline(cast_or_null<MDLocation>(InlinedScope));
-}
-
-DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
-  assert(DIVariable(DV).Verify() && "Expected a DIVariable");
-  return cast<MDLocalVariable>(DV)->withoutInline();
-}
-
 DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
-  DIDescriptor D(Scope);
-  if (D.isSubprogram())
-    return DISubprogram(Scope);
-
-  if (D.isLexicalBlockFile())
-    return getDISubprogram(DILexicalBlockFile(Scope).getContext());
-
-  if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(Scope).getContext());
-
-  return DISubprogram();
+  if (auto *LocalScope = dyn_cast_or_null<MDLocalScope>(Scope))
+    return LocalScope->getSubprogram();
+  return nullptr;
 }
 
 DISubprogram llvm::getDISubprogram(const Function *F) {
   // We look for the first instr that has a debug annotation leading back to F.
   for (auto &BB : *F) {
     auto Inst = std::find_if(BB.begin(), BB.end(), [](const Instruction &Inst) {
-      return !Inst.getDebugLoc().isUnknown();
+      return Inst.getDebugLoc();
     });
     if (Inst == BB.end())
       continue;
     DebugLoc DLoc = Inst->getDebugLoc();
-    const MDNode *Scope = DLoc.getScopeNode();
+    const MDNode *Scope = DLoc.getInlinedAtScope();
     DISubprogram Subprogram = getDISubprogram(Scope);
-    return Subprogram.describes(F) ? Subprogram : DISubprogram();
+    return Subprogram->describes(F) ? Subprogram : DISubprogram();
   }
 
   return DISubprogram();
 }
 
 DICompositeType llvm::getDICompositeType(DIType T) {
-  if (T.isCompositeType())
-    return DICompositeType(T);
+  if (auto *C = dyn_cast_or_null<MDCompositeTypeBase>(T))
+    return C;
 
-  if (T.isDerivedType()) {
+  if (auto *D = dyn_cast_or_null<MDDerivedTypeBase>(T)) {
     // This function is currently used by dragonegg and dragonegg does
     // not generate identifier for types, so using an empty map to resolve
     // DerivedFrom should be fine.
     DITypeIdentifierMap EmptyMap;
-    return getDICompositeType(
-        DIDerivedType(T).getTypeDerivedFrom().resolve(EmptyMap));
+    return getDICompositeType(D->getBaseType().resolve(EmptyMap));
   }
 
-  return DICompositeType();
+  return nullptr;
 }
 
 DITypeIdentifierMap
 llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) {
   DITypeIdentifierMap Map;
   for (unsigned CUi = 0, CUe = CU_Nodes->getNumOperands(); CUi != CUe; ++CUi) {
-    DICompileUnit CU(CU_Nodes->getOperand(CUi));
-    DIArray Retain = CU.getRetainedTypes();
-    for (unsigned Ti = 0, Te = Retain.getNumElements(); Ti != Te; ++Ti) {
-      if (!Retain.getElement(Ti).isCompositeType())
+    auto *CU = cast<MDCompileUnit>(CU_Nodes->getOperand(CUi));
+    DIArray Retain = CU->getRetainedTypes();
+    for (unsigned Ti = 0, Te = Retain.size(); Ti != Te; ++Ti) {
+      if (!isa<MDCompositeType>(Retain[Ti]))
         continue;
-      DICompositeType Ty(Retain.getElement(Ti));
-      if (MDString *TypeId = Ty.getIdentifier()) {
+      auto *Ty = cast<MDCompositeType>(Retain[Ti]);
+      if (MDString *TypeId = Ty->getRawIdentifier()) {
         // Definition has priority over declaration.
         // Try to insert (TypeId, Ty) to Map.
         std::pair<DITypeIdentifierMap::iterator, bool> P =
             Map.insert(std::make_pair(TypeId, Ty));
         // If TypeId already exists in Map and this is a definition, replace
         // whatever we had (declaration or definition) with the definition.
-        if (!P.second && !Ty.isForwardDecl())
+        if (!P.second && !Ty->isForwardDecl())
           P.first->second = Ty;
       }
     }
@@ -673,37 +123,28 @@ void DebugInfoFinder::processModule(const Module &M) {
   InitializeTypeMap(M);
   if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CU(CU_Nodes->getOperand(i));
+      DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
       addCompileUnit(CU);
-      DIArray GVs = CU.getGlobalVariables();
-      for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
-        DIGlobalVariable DIG(GVs.getElement(i));
+      for (DIGlobalVariable DIG : CU->getGlobalVariables()) {
         if (addGlobalVariable(DIG)) {
-          processScope(DIG.getContext());
-          processType(DIG.getType().resolve(TypeIdentifierMap));
+          processScope(DIG->getScope());
+          processType(DIG->getType().resolve(TypeIdentifierMap));
         }
       }
-      DIArray SPs = CU.getSubprograms();
-      for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-        processSubprogram(DISubprogram(SPs.getElement(i)));
-      DIArray EnumTypes = CU.getEnumTypes();
-      for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
-        processType(DIType(EnumTypes.getElement(i)));
-      DIArray RetainedTypes = CU.getRetainedTypes();
-      for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
-        processType(DIType(RetainedTypes.getElement(i)));
-      DIArray Imports = CU.getImportedEntities();
-      for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) {
-        DIImportedEntity Import = DIImportedEntity(Imports.getElement(i));
-        if (!Import)
-          continue;
-        DIDescriptor Entity = Import.getEntity().resolve(TypeIdentifierMap);
-        if (Entity.isType())
-          processType(DIType(Entity));
-        else if (Entity.isSubprogram())
-          processSubprogram(DISubprogram(Entity));
-        else if (Entity.isNameSpace())
-          processScope(DINameSpace(Entity).getContext());
+      for (auto *SP : CU->getSubprograms())
+        processSubprogram(SP);
+      for (auto *ET : CU->getEnumTypes())
+        processType(ET);
+      for (auto *RT : CU->getRetainedTypes())
+        processType(RT);
+      for (DIImportedEntity Import : CU->getImportedEntities()) {
+        auto *Entity = Import->getEntity().resolve(TypeIdentifierMap);
+        if (auto *T = dyn_cast<MDType>(Entity))
+          processType(T);
+        else if (auto *SP = dyn_cast<MDSubprogram>(Entity))
+          processSubprogram(SP);
+        else if (auto *NS = dyn_cast<MDNamespace>(Entity))
+          processScope(NS->getScope());
       }
     }
   }
@@ -713,79 +154,66 @@ void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) {
   if (!Loc)
     return;
   InitializeTypeMap(M);
-  processScope(Loc.getScope());
-  processLocation(M, Loc.getOrigLocation());
+  processScope(Loc->getScope());
+  processLocation(M, Loc->getInlinedAt());
 }
 
 void DebugInfoFinder::processType(DIType DT) {
   if (!addType(DT))
     return;
-  processScope(DT.getContext().resolve(TypeIdentifierMap));
-  if (DT.isCompositeType()) {
-    DICompositeType DCT(DT);
-    processType(DCT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
-    if (DT.isSubroutineType()) {
-      DITypeArray DTA = DISubroutineType(DT).getTypeArray();
-      for (unsigned i = 0, e = DTA.getNumElements(); i != e; ++i)
-        processType(DTA.getElement(i).resolve(TypeIdentifierMap));
+  processScope(DT->getScope().resolve(TypeIdentifierMap));
+  if (auto *DCT = dyn_cast<MDCompositeTypeBase>(DT)) {
+    processType(DCT->getBaseType().resolve(TypeIdentifierMap));
+    if (auto *ST = dyn_cast<MDSubroutineType>(DCT)) {
+      for (MDTypeRef Ref : ST->getTypeArray())
+        processType(Ref.resolve(TypeIdentifierMap));
       return;
     }
-    DIArray DA = DCT.getElements();
-    for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
-      DIDescriptor D = DA.getElement(i);
-      if (D.isType())
-        processType(DIType(D));
-      else if (D.isSubprogram())
-        processSubprogram(DISubprogram(D));
+    for (Metadata *D : DCT->getElements()) {
+      if (auto *T = dyn_cast<MDType>(D))
+        processType(T);
+      else if (DISubprogram SP = dyn_cast<MDSubprogram>(D))
+        processSubprogram(SP);
     }
-  } else if (DT.isDerivedType()) {
-    DIDerivedType DDT(DT);
-    processType(DDT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
+  } else if (auto *DDT = dyn_cast<MDDerivedTypeBase>(DT)) {
+    processType(DDT->getBaseType().resolve(TypeIdentifierMap));
   }
 }
 
 void DebugInfoFinder::processScope(DIScope Scope) {
-  if (Scope.isType()) {
-    DIType Ty(Scope);
+  if (!Scope)
+    return;
+  if (DIType Ty = dyn_cast<MDType>(Scope)) {
     processType(Ty);
     return;
   }
-  if (Scope.isCompileUnit()) {
-    addCompileUnit(DICompileUnit(Scope));
+  if (DICompileUnit CU = dyn_cast<MDCompileUnit>(Scope)) {
+    addCompileUnit(CU);
     return;
   }
-  if (Scope.isSubprogram()) {
-    processSubprogram(DISubprogram(Scope));
+  if (DISubprogram SP = dyn_cast<MDSubprogram>(Scope)) {
+    processSubprogram(SP);
     return;
   }
   if (!addScope(Scope))
     return;
-  if (Scope.isLexicalBlock()) {
-    DILexicalBlock LB(Scope);
-    processScope(LB.getContext());
-  } else if (Scope.isLexicalBlockFile()) {
-    DILexicalBlockFile LBF = DILexicalBlockFile(Scope);
-    processScope(LBF.getScope());
-  } else if (Scope.isNameSpace()) {
-    DINameSpace NS(Scope);
-    processScope(NS.getContext());
+  if (auto *LB = dyn_cast<MDLexicalBlockBase>(Scope)) {
+    processScope(LB->getScope());
+  } else if (auto *NS = dyn_cast<MDNamespace>(Scope)) {
+    processScope(NS->getScope());
   }
 }
 
 void DebugInfoFinder::processSubprogram(DISubprogram SP) {
   if (!addSubprogram(SP))
     return;
-  processScope(SP.getContext().resolve(TypeIdentifierMap));
-  processType(SP.getType());
-  DIArray TParams = SP.getTemplateParams();
-  for (unsigned I = 0, E = TParams.getNumElements(); I != E; ++I) {
-    DIDescriptor Element = TParams.getElement(I);
-    if (Element.isTemplateTypeParameter()) {
-      DITemplateTypeParameter TType(Element);
-      processType(TType.getType().resolve(TypeIdentifierMap));
-    } else if (Element.isTemplateValueParameter()) {
-      DITemplateValueParameter TVal(Element);
-      processType(TVal.getType().resolve(TypeIdentifierMap));
+  processScope(SP->getScope().resolve(TypeIdentifierMap));
+  processType(SP->getType());
+  for (auto *Element : SP->getTemplateParams()) {
+    if (auto *TType = dyn_cast<MDTemplateTypeParameter>(Element)) {
+      processType(TType->getType().resolve(TypeIdentifierMap));
+    } else if (auto *TVal = dyn_cast<MDTemplateValueParameter>(Element)) {
+      processType(TVal->getType().resolve(TypeIdentifierMap));
     }
   }
 }
@@ -797,14 +225,14 @@ void DebugInfoFinder::processDeclare(const Module &M,
     return;
   InitializeTypeMap(M);
 
-  DIDescriptor DV(N);
-  if (!DV.isVariable())
+  DIVariable DV = dyn_cast<MDLocalVariable>(N);
+  if (!DV)
     return;
 
   if (!NodesSeen.insert(DV).second)
     return;
-  processScope(DIVariable(N).getContext());
-  processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
+  processScope(DV->getScope());
+  processType(DV->getType().resolve(TypeIdentifierMap));
 }
 
 void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
@@ -813,14 +241,14 @@ void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
     return;
   InitializeTypeMap(M);
 
-  DIDescriptor DV(N);
-  if (!DV.isVariable())
+  DIVariable DV = dyn_cast<MDLocalVariable>(N);
+  if (!DV)
     return;
 
   if (!NodesSeen.insert(DV).second)
     return;
-  processScope(DIVariable(N).getContext());
-  processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
+  processScope(DV->getScope());
+  processType(DV->getType().resolve(TypeIdentifierMap));
 }
 
 bool DebugInfoFinder::addType(DIType DT) {
@@ -879,76 +307,17 @@ bool DebugInfoFinder::addScope(DIScope Scope) {
   return true;
 }
 
-//===----------------------------------------------------------------------===//
-// DIDescriptor: dump routines for all descriptors.
-//===----------------------------------------------------------------------===//
-
-void DIDescriptor::dump() const {
-  print(dbgs());
-  dbgs() << '\n';
-}
-
-void DIDescriptor::print(raw_ostream &OS) const {
-  if (!get())
-    return;
-  get()->print(OS);
-}
-
-static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
-                          const LLVMContext &Ctx) {
-  if (!DL.isUnknown()) { // Print source line info.
-    DIScope Scope(DL.getScope(Ctx));
-    assert(Scope.isScope() && "Scope of a DebugLoc should be a DIScope.");
-    // Omit the directory, because it's likely to be long and uninteresting.
-    CommentOS << Scope.getFilename();
-    CommentOS << ':' << DL.getLine();
-    if (DL.getCol() != 0)
-      CommentOS << ':' << DL.getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
-    if (!InlinedAtDL.isUnknown()) {
-      CommentOS << " @[ ";
-      printDebugLoc(InlinedAtDL, CommentOS, Ctx);
-      CommentOS << " ]";
-    }
-  }
-}
-
-void DIVariable::printExtendedName(raw_ostream &OS) const {
-  const LLVMContext &Ctx = DbgNode->getContext();
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << Res << "," << getLineNumber();
-  if (MDNode *InlinedAt = getInlinedAt()) {
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-    if (!InlinedAtDL.isUnknown()) {
-      OS << " @[";
-      printDebugLoc(InlinedAtDL, OS, Ctx);
-      OS << "]";
+bool llvm::stripDebugInfo(Function &F) {
+  bool Changed = false;
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (I.getDebugLoc()) {
+        Changed = true;
+        I.setDebugLoc(DebugLoc());
+      }
     }
   }
-}
-
-template <> DIRef<DIDescriptor>::DIRef(const Metadata *V) : Val(V) {
-  assert(isDescriptorRef(V) &&
-         "DIDescriptorRef should be a MDString or MDNode");
-}
-template <> DIRef<DIScope>::DIRef(const Metadata *V) : Val(V) {
-  assert(isScopeRef(V) && "DIScopeRef should be a MDString or MDNode");
-}
-template <> DIRef<DIType>::DIRef(const Metadata *V) : Val(V) {
-  assert(isTypeRef(V) && "DITypeRef should be a MDString or MDNode");
-}
-
-template <>
-DIDescriptorRef DIDescriptor::getFieldAs<DIDescriptorRef>(unsigned Elt) const {
-  return DIDescriptorRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
-}
-template <>
-DIScopeRef DIDescriptor::getFieldAs<DIScopeRef>(unsigned Elt) const {
-  return DIScopeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
-}
-template <> DITypeRef DIDescriptor::getFieldAs<DITypeRef>(unsigned Elt) const {
-  return DITypeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
+  return Changed;
 }
 
 bool llvm::StripDebugInfo(Module &M) {
@@ -984,16 +353,11 @@ bool llvm::StripDebugInfo(Module &M) {
     }
   }
 
-  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
-    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
-         ++FI)
-      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
-           ++BI) {
-        if (!BI->getDebugLoc().isUnknown()) {
-          Changed = true;
-          BI->setDebugLoc(DebugLoc());
-        }
-      }
+  for (Function &F : M)
+    Changed |= stripDebugInfo(F);
+
+  if (GVMaterializer *Materializer = M.getMaterializer())
+    Materializer->setStripDebugInfo();
 
   return Changed;
 }
@@ -1014,11 +378,9 @@ llvm::makeSubprogramMap(const Module &M) {
     return R;
 
   for (MDNode *N : CU_Nodes->operands()) {
-    DICompileUnit CUNode(N);
-    DIArray SPs = CUNode.getSubprograms();
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      if (Function *F = SP.getFunction())
+    DICompileUnit CUNode = cast<MDCompileUnit>(N);
+    for (DISubprogram SP : CUNode->getSubprograms()) {
+      if (Function *F = SP->getFunction())
         R.insert(std::make_pair(F, SP));
     }
   }
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 89ec1bc..f6f2ff2 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -14,6 +14,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "LLVMContextImpl.h"
 #include "MetadataImpl.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Function.h"
 
 using namespace llvm;
@@ -44,6 +45,7 @@ MDLocation *MDLocation::getImpl(LLVMContext &Context, unsigned Line,
   // Fixup column.
   adjustColumn(Column);
 
+  assert(Scope && "Expected scope");
   if (Storage == Uniqued) {
     if (auto *N =
             getUniqued(Context.pImpl->MDLocations,
@@ -64,6 +66,96 @@ MDLocation *MDLocation::getImpl(LLVMContext &Context, unsigned Line,
                    Storage, Context.pImpl->MDLocations);
 }
 
+unsigned MDLocation::computeNewDiscriminator() const {
+  // FIXME: This seems completely wrong.
+  //
+  //  1. If two modules are generated in the same context, then the second
+  //     Module will get different discriminators than it would have if it were
+  //     generated in its own context.
+  //  2. If this function is called after round-tripping to bitcode instead of
+  //     before, it will give a different (and potentially incorrect!) return.
+  //
+  // The discriminator should instead be calculated from local information
+  // where it's actually needed.  This logic should be moved to
+  // AddDiscriminators::runOnFunction(), where it doesn't pollute the
+  // LLVMContext.
+  std::pair<const char *, unsigned> Key(getFilename().data(), getLine());
+  return ++getContext().pImpl->DiscriminatorTable[Key];
+}
+
+unsigned DebugNode::getFlag(StringRef Flag) {
+  return StringSwitch<unsigned>(Flag)
+#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
+#include "llvm/IR/DebugInfoFlags.def"
+      .Default(0);
+}
+
+const char *DebugNode::getFlagString(unsigned Flag) {
+  switch (Flag) {
+  default:
+    return "";
+#define HANDLE_DI_FLAG(ID, NAME)                                               \
+  case Flag##NAME:                                                             \
+    return "DIFlag" #NAME;
+#include "llvm/IR/DebugInfoFlags.def"
+  }
+}
+
+unsigned DebugNode::splitFlags(unsigned Flags,
+                               SmallVectorImpl<unsigned> &SplitFlags) {
+  // Accessibility flags need to be specially handled, since they're packed
+  // together.
+  if (unsigned A = Flags & FlagAccessibility) {
+    if (A == FlagPrivate)
+      SplitFlags.push_back(FlagPrivate);
+    else if (A == FlagProtected)
+      SplitFlags.push_back(FlagProtected);
+    else
+      SplitFlags.push_back(FlagPublic);
+    Flags &= ~A;
+  }
+
+#define HANDLE_DI_FLAG(ID, NAME)                                               \
+  if (unsigned Bit = Flags & ID) {                                             \
+    SplitFlags.push_back(Bit);                                                 \
+    Flags &= ~Bit;                                                             \
+  }
+#include "llvm/IR/DebugInfoFlags.def"
+
+  return Flags;
+}
+
+MDScopeRef MDScope::getScope() const {
+  if (auto *T = dyn_cast<MDType>(this))
+    return T->getScope();
+
+  if (auto *SP = dyn_cast<MDSubprogram>(this))
+    return SP->getScope();
+
+  if (auto *LB = dyn_cast<MDLexicalBlockBase>(this))
+    return MDScopeRef(LB->getScope());
+
+  if (auto *NS = dyn_cast<MDNamespace>(this))
+    return MDScopeRef(NS->getScope());
+
+  assert((isa<MDFile>(this) || isa<MDCompileUnit>(this)) &&
+         "Unhandled type of scope.");
+  return nullptr;
+}
+
+StringRef MDScope::getName() const {
+  if (auto *T = dyn_cast<MDType>(this))
+    return T->getName();
+  if (auto *SP = dyn_cast<MDSubprogram>(this))
+    return SP->getName();
+  if (auto *NS = dyn_cast<MDNamespace>(this))
+    return NS->getName();
+  assert((isa<MDLexicalBlockBase>(this) || isa<MDFile>(this) ||
+          isa<MDCompileUnit>(this)) &&
+         "Unhandled type of scope.");
+  return "";
+}
+
 static StringRef getString(const MDString *S) {
   if (S)
     return S->getString();
@@ -237,6 +329,12 @@ MDCompileUnit *MDCompileUnit::getImpl(
       (SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind), Ops);
 }
 
+MDSubprogram *MDLocalScope::getSubprogram() const {
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(this))
+    return Block->getScope()->getSubprogram();
+  return const_cast<MDSubprogram *>(cast<MDSubprogram>(this));
+}
+
 MDSubprogram *MDSubprogram::getImpl(
     LLVMContext &Context, Metadata *Scope, MDString *Name,
     MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
@@ -262,6 +360,21 @@ MDSubprogram *MDSubprogram::getImpl(
                        Ops);
 }
 
+Function *MDSubprogram::getFunction() const {
+  // FIXME: Should this be looking through bitcasts?
+  return dyn_cast_or_null<Function>(getFunctionConstant());
+}
+
+bool MDSubprogram::describes(const Function *F) const {
+  assert(F && "Invalid function");
+  if (F == getFunction())
+    return true;
+  StringRef Name = getLinkageName();
+  if (Name.empty())
+    Name = getName();
+  return F->getName() == Name;
+}
+
 void MDSubprogram::replaceFunction(Function *F) {
   replaceFunction(F ? ConstantAsMetadata::get(F)
                     : static_cast<ConstantAsMetadata *>(nullptr));
@@ -271,6 +384,7 @@ MDLexicalBlock *MDLexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope,
                                         Metadata *File, unsigned Line,
                                         unsigned Column, StorageType Storage,
                                         bool ShouldCreate) {
+  assert(Scope && "Expected scope");
   DEFINE_GETIMPL_LOOKUP(MDLexicalBlock, (Scope, File, Line, Column));
   Metadata *Ops[] = {File, Scope};
   DEFINE_GETIMPL_STORE(MDLexicalBlock, (Line, Column), Ops);
@@ -281,6 +395,7 @@ MDLexicalBlockFile *MDLexicalBlockFile::getImpl(LLVMContext &Context,
                                                 unsigned Discriminator,
                                                 StorageType Storage,
                                                 bool ShouldCreate) {
+  assert(Scope && "Expected scope");
   DEFINE_GETIMPL_LOOKUP(MDLexicalBlockFile, (Scope, File, Discriminator));
   Metadata *Ops[] = {File, Scope};
   DEFINE_GETIMPL_STORE(MDLexicalBlockFile, (Discriminator), Ops);
@@ -335,20 +450,23 @@ MDGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
                        Ops);
 }
 
-MDLocalVariable *MDLocalVariable::getImpl(
-    LLVMContext &Context, unsigned Tag, Metadata *Scope, MDString *Name,
-    Metadata *File, unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags,
-    Metadata *InlinedAt, StorageType Storage, bool ShouldCreate) {
+MDLocalVariable *MDLocalVariable::getImpl(LLVMContext &Context, unsigned Tag,
+                                          Metadata *Scope, MDString *Name,
+                                          Metadata *File, unsigned Line,
+                                          Metadata *Type, unsigned Arg,
+                                          unsigned Flags, StorageType Storage,
+                                          bool ShouldCreate) {
   // Truncate Arg to 8 bits.
   //
   // FIXME: This is gross (and should be changed to an assert or removed), but
   // it matches historical behaviour for now.
   Arg &= (1u << 8) - 1;
 
+  assert(Scope && "Expected scope");
   assert(isCanonical(Name) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(MDLocalVariable, (Tag, Scope, getString(Name), File,
-                                          Line, Type, Arg, Flags, InlinedAt));
-  Metadata *Ops[] = {Scope, Name, File, Type, InlinedAt};
+                                          Line, Type, Arg, Flags));
+  Metadata *Ops[] = {Scope, Name, File, Type};
   DEFINE_GETIMPL_STORE(MDLocalVariable, (Tag, Line, Arg, Flags), Ops);
 }
 
@@ -391,6 +509,24 @@ bool MDExpression::isValid() const {
   return true;
 }
 
+bool MDExpression::isBitPiece() const {
+  assert(isValid() && "Expected valid expression");
+  if (unsigned N = getNumElements())
+    if (N >= 3)
+      return getElement(N - 3) == dwarf::DW_OP_bit_piece;
+  return false;
+}
+
+uint64_t MDExpression::getBitPieceOffset() const {
+  assert(isBitPiece() && "Expected bit piece");
+  return getElement(getNumElements() - 2);
+}
+
+uint64_t MDExpression::getBitPieceSize() const {
+  assert(isBitPiece() && "Expected bit piece");
+  return getElement(getNumElements() - 1);
+}
+
 MDObjCProperty *MDObjCProperty::getImpl(
     LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line,
     MDString *GetterName, MDString *SetterName, unsigned Attributes,
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index e1bf795..4cf7e9e 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -16,101 +16,87 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 // DebugLoc Implementation
 //===----------------------------------------------------------------------===//
+DebugLoc::DebugLoc(const MDLocation *L) : Loc(const_cast<MDLocation *>(L)) {}
+DebugLoc::DebugLoc(const MDNode *L) : Loc(const_cast<MDNode *>(L)) {}
 
-unsigned DebugLoc::getLine() const { return DILocation(Loc).getLineNumber(); }
-unsigned DebugLoc::getCol() const { return DILocation(Loc).getColumnNumber(); }
+MDLocation *DebugLoc::get() const {
+  return cast_or_null<MDLocation>(Loc.get());
+}
 
-MDNode *DebugLoc::getScope() const { return DILocation(Loc).getScope(); }
+unsigned DebugLoc::getLine() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getLine();
+}
 
-MDNode *DebugLoc::getInlinedAt() const {
-  return DILocation(Loc).getOrigLocation();
+unsigned DebugLoc::getCol() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getColumn();
 }
 
-/// Return both the Scope and the InlinedAt values.
-void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA) const {
-  Scope = getScope();
-  IA = getInlinedAt();
+MDNode *DebugLoc::getScope() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getScope();
 }
 
-MDNode *DebugLoc::getScopeNode() const {
-  if (MDNode *InlinedAt = getInlinedAt())
-    return DebugLoc::getFromDILocation(InlinedAt).getScopeNode();
-  return getScope();
+MDLocation *DebugLoc::getInlinedAt() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getInlinedAt();
+}
+
+MDNode *DebugLoc::getInlinedAtScope() const {
+  return cast<MDLocation>(Loc)->getInlinedAtScope();
 }
 
 DebugLoc DebugLoc::getFnDebugLoc() const {
-  const MDNode *Scope = getScopeNode();
-  DISubprogram SP = getDISubprogram(Scope);
-  if (SP.isSubprogram())
-    return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+  // FIXME: Add a method on \a MDLocation that does this work.
+  const MDNode *Scope = getInlinedAtScope();
+  if (DISubprogram SP = getDISubprogram(Scope))
+    return DebugLoc::get(SP->getScopeLine(), 0, SP);
 
   return DebugLoc();
 }
 
-DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
-                       MDNode *Scope, MDNode *InlinedAt) {
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope,
+                       const MDNode *InlinedAt) {
   // If no scope is available, this is an unknown location.
   if (!Scope)
     return DebugLoc();
 
-  return getFromDILocation(
-      MDLocation::get(Scope->getContext(), Line, Col, Scope, InlinedAt));
-}
-
-/// getAsMDNode - This method converts the compressed DebugLoc node into a
-/// DILocation-compatible MDNode.
-MDNode *DebugLoc::getAsMDNode() const { return Loc; }
-
-/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
-DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
-  DebugLoc Loc;
-  Loc.Loc.reset(N);
-  return Loc;
-}
-
-/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
-DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
-  DILexicalBlock LexBlock(N);
-  MDNode *Scope = LexBlock.getContext();
-  if (!Scope) return DebugLoc();
-  return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope,
-             nullptr);
+  return MDLocation::get(Scope->getContext(), Line, Col,
+                         const_cast<MDNode *>(Scope),
+                         const_cast<MDNode *>(InlinedAt));
 }
 
 void DebugLoc::dump() const {
 #ifndef NDEBUG
-  if (!isUnknown()) {
-    dbgs() << getLine();
-    if (getCol() != 0)
-      dbgs() << ',' << getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt());
-    if (!InlinedAtDL.isUnknown()) {
-      dbgs() << " @ ";
-      InlinedAtDL.dump();
-    } else
-      dbgs() << "\n";
-  }
+  if (!Loc)
+    return;
+
+  dbgs() << getLine();
+  if (getCol() != 0)
+    dbgs() << ',' << getCol();
+  if (DebugLoc InlinedAtDL = DebugLoc(getInlinedAt())) {
+    dbgs() << " @ ";
+    InlinedAtDL.dump();
+  } else
+    dbgs() << "\n";
 #endif
 }
 
 void DebugLoc::print(raw_ostream &OS) const {
-  if (!isUnknown()) {
-    // Print source line info.
-    DIScope Scope(getScope());
-    assert((!Scope || Scope.isScope()) &&
-           "Scope of a DebugLoc should be null or a DIScope.");
-    if (Scope)
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << getLine();
-    if (getCol() != 0)
-      OS << ':' << getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt());
-    if (!InlinedAtDL.isUnknown()) {
-      OS << " @[ ";
-      InlinedAtDL.print(OS);
-      OS << " ]";
-    }
+  if (!Loc)
+    return;
+
+  // Print source line info.
+  auto *Scope = cast<MDScope>(getScope());
+  OS << Scope->getFilename();
+  OS << ':' << getLine();
+  if (getCol() != 0)
+    OS << ':' << getCol();
+
+  if (DebugLoc InlinedAtDL = getInlinedAt()) {
+    OS << " @[ ";
+    InlinedAtDL.print(OS);
+    OS << " ]";
   }
 }
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 5608589..91635f6 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -129,16 +129,16 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
 }
 
 bool DiagnosticInfoOptimizationBase::isLocationAvailable() const {
-  return !getDebugLoc().isUnknown();
+  return getDebugLoc();
 }
 
 void DiagnosticInfoOptimizationBase::getLocation(StringRef *Filename,
                                                  unsigned *Line,
                                                  unsigned *Column) const {
-  DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext()));
-  *Filename = DIL.getFilename();
-  *Line = DIL.getLineNumber();
-  *Column = DIL.getColumnNumber();
+  MDLocation *L = getDebugLoc();
+  *Filename = L->getFilename();
+  *Line = L->getLine();
+  *Column = L->getColumn();
 }
 
 const std::string DiagnosticInfoOptimizationBase::getLocationStr() const {
@@ -147,7 +147,7 @@ const std::string DiagnosticInfoOptimizationBase::getLocationStr() const {
   unsigned Column = 0;
   if (isLocationAvailable())
     getLocation(&Filename, &Line, &Column);
-  return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
+  return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
 }
 
 void DiagnosticInfoOptimizationBase::print(DiagnosticPrinter &DP) const {
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 33e1526..d3a0934 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -215,9 +215,7 @@ LLVMContext &Function::getContext() const {
   return getType()->getContext();
 }
 
-FunctionType *Function::getFunctionType() const {
-  return cast<FunctionType>(getType()->getElementType());
-}
+FunctionType *Function::getFunctionType() const { return Ty; }
 
 bool Function::isVarArg() const {
   return getFunctionType()->isVarArg();
@@ -242,7 +240,8 @@ void Function::eraseFromParent() {
 Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
                    Module *ParentModule)
     : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal, nullptr, 0,
-                   Linkage, name) {
+                   Linkage, name),
+      Ty(Ty) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
          "invalid return type");
   setIsMaterializable(false);
@@ -349,6 +348,12 @@ void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
+void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 // Maintain the GC name for each function in an on-the-side table. This saves
 // allocating an additional word in Function for programs which do not use GC
 // (i.e., most programs) at the cost of increased overhead for clients which do
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 7010ceb..6ed5891 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -555,7 +555,7 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
     return llvm::make_unique<raw_null_ostream>();
 
   std::error_code EC;
-  auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath.str(), EC,
+  auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath, EC,
                                               sys::fs::F_Text);
   if (EC) {
     errs() << EC.message() << "\n";
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index 90303b2..06f54c7 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -23,7 +23,8 @@ using namespace llvm;
 /// has array of i8 type filled in with the nul terminated string value
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
-Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
+GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
+                                                  const Twine &Name) {
   Constant *StrConstant = ConstantDataArray::getString(Context, Str);
   Module &M = *BB->getParent()->getParent();
   GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 91ccfbb..c1ac336 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -21,11 +21,14 @@
 using namespace llvm;
 
 PrintModulePass::PrintModulePass() : OS(dbgs()) {}
-PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner)
-    : OS(OS), Banner(Banner) {}
+PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
+                                 bool ShouldPreserveUseListOrder)
+    : OS(OS), Banner(Banner),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
 PreservedAnalyses PrintModulePass::run(Module &M) {
-  OS << Banner << M;
+  OS << Banner;
+  M.print(OS, nullptr, ShouldPreserveUseListOrder);
   return PreservedAnalyses::all();
 }
 
@@ -46,8 +49,9 @@ class PrintModulePassWrapper : public ModulePass {
 public:
   static char ID;
   PrintModulePassWrapper() : ModulePass(ID) {}
-  PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner)
-      : ModulePass(ID), P(OS, Banner) {}
+  PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner,
+                         bool ShouldPreserveUseListOrder)
+      : ModulePass(ID), P(OS, Banner, ShouldPreserveUseListOrder) {}
 
   bool runOnModule(Module &M) override {
     P.run(M);
@@ -112,8 +116,9 @@ INITIALIZE_PASS(PrintBasicBlockPass, "print-bb", "Print BB to stderr", false,
                 false)
 
 ModulePass *llvm::createPrintModulePass(llvm::raw_ostream &OS,
-                                        const std::string &Banner) {
-  return new PrintModulePassWrapper(OS, Banner);
+                                        const std::string &Banner,
+                                        bool ShouldPreserveUseListOrder) {
+  return new PrintModulePassWrapper(OS, Banner, ShouldPreserveUseListOrder);
 }
 
 FunctionPass *llvm::createPrintFunctionPass(llvm::raw_ostream &OS,
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index b456d9f..aa9e027 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -167,7 +167,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
         // Note that operand #n has a matching input.
         scInfo.MatchingInput = ConstraintsSoFar.size();
       } else {
-        if (ConstraintsSoFar[N].hasMatchingInput())
+        if (ConstraintsSoFar[N].hasMatchingInput() &&
+            (size_t)ConstraintsSoFar[N].MatchingInput !=
+                ConstraintsSoFar.size())
           return true;
         // Note that operand #n has a matching input.
         ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 7d9bd7e..57c143c 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -62,8 +62,8 @@ void Instruction::removeFromParent() {
   getParent()->getInstList().remove(this);
 }
 
-void Instruction::eraseFromParent() {
-  getParent()->getInstList().erase(this);
+iplist<Instruction>::iterator Instruction::eraseFromParent() {
+  return getParent()->getInstList().erase(this);
 }
 
 /// insertBefore - Insert an unlinked instructions into a basic block
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index af2aeb9..85b7521 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -352,6 +352,12 @@ void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
+void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 bool CallInst::hasFnAttrImpl(Attribute::AttrKind A) const {
   if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
     return true;
@@ -617,6 +623,12 @@ void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
+void InvokeInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 LandingPadInst *InvokeInst::getLandingPadInst() const {
   return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
 }
@@ -943,12 +955,10 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
     : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread, InsertAE) {
 }
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    unsigned Align, AtomicOrdering Order,
-                   SynchronizationScope SynchScope,
-                   Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
+                   SynchronizationScope SynchScope, Instruction *InsertBef)
+    : UnaryInstruction(Ty, Load, Ptr, InsertBef) {
   setVolatile(isVolatile);
   setAlignment(Align);
   setAtomic(Order, SynchScope);
@@ -1258,11 +1268,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
 /// pointer type.
 ///
 template <typename IndexTy>
-static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
-  PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType());
-  if (!PTy) return nullptr;   // Type isn't a pointer type!
-  Type *Agg = PTy->getElementType();
-
+static Type *getIndexedTypeInternal(Type *Agg, ArrayRef<IndexTy> IdxList) {
   // Handle the special case of the empty set index set, which is always valid.
   if (IdxList.empty())
     return Agg;
@@ -1283,17 +1289,17 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
   return CurIdx == IdxList.size() ? Agg : nullptr;
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<Value *> IdxList) {
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr,
+Type *GetElementPtrInst::getIndexedType(Type *Ty,
                                         ArrayRef<Constant *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<uint64_t> IdxList) {
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
 /// hasAllZeroIndices - Return true if all of the indices of this GEP are
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index e380665..c096a83 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -275,15 +275,17 @@ template <> struct MDNodeKeyImpl<GenericDebugNode> : MDNodeOpsKey {
 
 template <> struct MDNodeKeyImpl<MDSubrange> {
   int64_t Count;
-  int64_t Lo;
+  int64_t LowerBound;
 
-  MDNodeKeyImpl(int64_t Count, int64_t Lo) : Count(Count), Lo(Lo) {}
-  MDNodeKeyImpl(const MDSubrange *N) : Count(N->getCount()), Lo(N->getLo()) {}
+  MDNodeKeyImpl(int64_t Count, int64_t LowerBound)
+      : Count(Count), LowerBound(LowerBound) {}
+  MDNodeKeyImpl(const MDSubrange *N)
+      : Count(N->getCount()), LowerBound(N->getLowerBound()) {}
 
   bool isKeyOf(const MDSubrange *RHS) const {
-    return Count == RHS->getCount() && Lo == RHS->getLo();
+    return Count == RHS->getCount() && LowerBound == RHS->getLowerBound();
   }
-  unsigned getHashValue() const { return hash_combine(Count, Lo); }
+  unsigned getHashValue() const { return hash_combine(Count, LowerBound); }
 };
 
 template <> struct MDNodeKeyImpl<MDEnumerator> {
@@ -347,20 +349,20 @@ template <> struct MDNodeKeyImpl<MDDerivedType> {
         BaseType(BaseType), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
         OffsetInBits(OffsetInBits), Flags(Flags), ExtraData(ExtraData) {}
   MDNodeKeyImpl(const MDDerivedType *N)
-      : Tag(N->getTag()), Name(N->getName()), File(N->getFile()),
-        Line(N->getLine()), Scope(N->getScope()), BaseType(N->getBaseType()),
-        SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()),
-        OffsetInBits(N->getOffsetInBits()), Flags(N->getFlags()),
-        ExtraData(N->getExtraData()) {}
+      : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()),
+        Line(N->getLine()), Scope(N->getRawScope()),
+        BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
+        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        Flags(N->getFlags()), ExtraData(N->getRawExtraData()) {}
 
   bool isKeyOf(const MDDerivedType *RHS) const {
     return Tag == RHS->getTag() && Name == RHS->getName() &&
-           File == RHS->getFile() && Line == RHS->getLine() &&
-           Scope == RHS->getScope() && BaseType == RHS->getBaseType() &&
+           File == RHS->getRawFile() && Line == RHS->getLine() &&
+           Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() &&
            SizeInBits == RHS->getSizeInBits() &&
            AlignInBits == RHS->getAlignInBits() &&
            OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() &&
-           ExtraData == RHS->getExtraData();
+           ExtraData == RHS->getRawExtraData();
   }
   unsigned getHashValue() const {
     return hash_combine(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
@@ -397,26 +399,26 @@ template <> struct MDNodeKeyImpl<MDCompositeType> {
         RuntimeLang(RuntimeLang), VTableHolder(VTableHolder),
         TemplateParams(TemplateParams), Identifier(Identifier) {}
   MDNodeKeyImpl(const MDCompositeType *N)
-      : Tag(N->getTag()), Name(N->getName()), File(N->getFile()),
-        Line(N->getLine()), Scope(N->getScope()), BaseType(N->getBaseType()),
-        SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()),
-        OffsetInBits(N->getOffsetInBits()), Flags(N->getFlags()),
-        Elements(N->getElements()), RuntimeLang(N->getRuntimeLang()),
-        VTableHolder(N->getVTableHolder()),
-        TemplateParams(N->getTemplateParams()), Identifier(N->getIdentifier()) {
-  }
+      : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()),
+        Line(N->getLine()), Scope(N->getRawScope()),
+        BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
+        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        Flags(N->getFlags()), Elements(N->getRawElements()),
+        RuntimeLang(N->getRuntimeLang()), VTableHolder(N->getRawVTableHolder()),
+        TemplateParams(N->getRawTemplateParams()),
+        Identifier(N->getIdentifier()) {}
 
   bool isKeyOf(const MDCompositeType *RHS) const {
     return Tag == RHS->getTag() && Name == RHS->getName() &&
-           File == RHS->getFile() && Line == RHS->getLine() &&
-           Scope == RHS->getScope() && BaseType == RHS->getBaseType() &&
+           File == RHS->getRawFile() && Line == RHS->getLine() &&
+           Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() &&
            SizeInBits == RHS->getSizeInBits() &&
            AlignInBits == RHS->getAlignInBits() &&
            OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() &&
-           Elements == RHS->getElements() &&
+           Elements == RHS->getRawElements() &&
            RuntimeLang == RHS->getRuntimeLang() &&
-           VTableHolder == RHS->getVTableHolder() &&
-           TemplateParams == RHS->getTemplateParams() &&
+           VTableHolder == RHS->getRawVTableHolder() &&
+           TemplateParams == RHS->getRawTemplateParams() &&
            Identifier == RHS->getIdentifier();
   }
   unsigned getHashValue() const {
@@ -433,10 +435,10 @@ template <> struct MDNodeKeyImpl<MDSubroutineType> {
   MDNodeKeyImpl(int64_t Flags, Metadata *TypeArray)
       : Flags(Flags), TypeArray(TypeArray) {}
   MDNodeKeyImpl(const MDSubroutineType *N)
-      : Flags(N->getFlags()), TypeArray(N->getTypeArray()) {}
+      : Flags(N->getFlags()), TypeArray(N->getRawTypeArray()) {}
 
   bool isKeyOf(const MDSubroutineType *RHS) const {
-    return Flags == RHS->getFlags() && TypeArray == RHS->getTypeArray();
+    return Flags == RHS->getFlags() && TypeArray == RHS->getRawTypeArray();
   }
   unsigned getHashValue() const { return hash_combine(Flags, TypeArray); }
 };
@@ -484,27 +486,28 @@ template <> struct MDNodeKeyImpl<MDCompileUnit> {
         Subprograms(Subprograms), GlobalVariables(GlobalVariables),
         ImportedEntities(ImportedEntities) {}
   MDNodeKeyImpl(const MDCompileUnit *N)
-      : SourceLanguage(N->getSourceLanguage()), File(N->getFile()),
+      : SourceLanguage(N->getSourceLanguage()), File(N->getRawFile()),
         Producer(N->getProducer()), IsOptimized(N->isOptimized()),
         Flags(N->getFlags()), RuntimeVersion(N->getRuntimeVersion()),
         SplitDebugFilename(N->getSplitDebugFilename()),
-        EmissionKind(N->getEmissionKind()), EnumTypes(N->getEnumTypes()),
-        RetainedTypes(N->getRetainedTypes()), Subprograms(N->getSubprograms()),
-        GlobalVariables(N->getGlobalVariables()),
-        ImportedEntities(N->getImportedEntities()) {}
+        EmissionKind(N->getEmissionKind()), EnumTypes(N->getRawEnumTypes()),
+        RetainedTypes(N->getRawRetainedTypes()),
+        Subprograms(N->getRawSubprograms()),
+        GlobalVariables(N->getRawGlobalVariables()),
+        ImportedEntities(N->getRawImportedEntities()) {}
 
   bool isKeyOf(const MDCompileUnit *RHS) const {
     return SourceLanguage == RHS->getSourceLanguage() &&
-           File == RHS->getFile() && Producer == RHS->getProducer() &&
+           File == RHS->getRawFile() && Producer == RHS->getProducer() &&
            IsOptimized == RHS->isOptimized() && Flags == RHS->getFlags() &&
            RuntimeVersion == RHS->getRuntimeVersion() &&
            SplitDebugFilename == RHS->getSplitDebugFilename() &&
            EmissionKind == RHS->getEmissionKind() &&
-           EnumTypes == RHS->getEnumTypes() &&
-           RetainedTypes == RHS->getRetainedTypes() &&
-           Subprograms == RHS->getSubprograms() &&
-           GlobalVariables == RHS->getGlobalVariables() &&
-           ImportedEntities == RHS->getImportedEntities();
+           EnumTypes == RHS->getRawEnumTypes() &&
+           RetainedTypes == RHS->getRawRetainedTypes() &&
+           Subprograms == RHS->getRawSubprograms() &&
+           GlobalVariables == RHS->getRawGlobalVariables() &&
+           ImportedEntities == RHS->getRawImportedEntities();
   }
   unsigned getHashValue() const {
     return hash_combine(SourceLanguage, File, Producer, IsOptimized, Flags,
@@ -549,31 +552,32 @@ template <> struct MDNodeKeyImpl<MDSubprogram> {
         Function(Function), TemplateParams(TemplateParams),
         Declaration(Declaration), Variables(Variables) {}
   MDNodeKeyImpl(const MDSubprogram *N)
-      : Scope(N->getScope()), Name(N->getName()),
-        LinkageName(N->getLinkageName()), File(N->getFile()),
-        Line(N->getLine()), Type(N->getType()),
+      : Scope(N->getRawScope()), Name(N->getName()),
+        LinkageName(N->getLinkageName()), File(N->getRawFile()),
+        Line(N->getLine()), Type(N->getRawType()),
         IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
-        ScopeLine(N->getScopeLine()), ContainingType(N->getContainingType()),
+        ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()),
         Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()),
         Flags(N->getFlags()), IsOptimized(N->isOptimized()),
-        Function(N->getFunction()), TemplateParams(N->getTemplateParams()),
-        Declaration(N->getDeclaration()), Variables(N->getVariables()) {}
+        Function(N->getRawFunction()),
+        TemplateParams(N->getRawTemplateParams()),
+        Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {}
 
   bool isKeyOf(const MDSubprogram *RHS) const {
-    return Scope == RHS->getScope() && Name == RHS->getName() &&
-           LinkageName == RHS->getLinkageName() && File == RHS->getFile() &&
-           Line == RHS->getLine() && Type == RHS->getType() &&
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
            IsLocalToUnit == RHS->isLocalToUnit() &&
            IsDefinition == RHS->isDefinition() &&
            ScopeLine == RHS->getScopeLine() &&
-           ContainingType == RHS->getContainingType() &&
+           ContainingType == RHS->getRawContainingType() &&
            Virtuality == RHS->getVirtuality() &&
            VirtualIndex == RHS->getVirtualIndex() && Flags == RHS->getFlags() &&
            IsOptimized == RHS->isOptimized() &&
-           Function == RHS->getFunction() &&
-           TemplateParams == RHS->getTemplateParams() &&
-           Declaration == RHS->getDeclaration() &&
-           Variables == RHS->getVariables();
+           Function == RHS->getRawFunction() &&
+           TemplateParams == RHS->getRawTemplateParams() &&
+           Declaration == RHS->getRawDeclaration() &&
+           Variables == RHS->getRawVariables();
   }
   unsigned getHashValue() const {
     return hash_combine(Scope, Name, LinkageName, File, Line, Type,
@@ -592,11 +596,11 @@ template <> struct MDNodeKeyImpl<MDLexicalBlock> {
   MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Line, unsigned Column)
       : Scope(Scope), File(File), Line(Line), Column(Column) {}
   MDNodeKeyImpl(const MDLexicalBlock *N)
-      : Scope(N->getScope()), File(N->getFile()), Line(N->getLine()),
+      : Scope(N->getRawScope()), File(N->getRawFile()), Line(N->getLine()),
         Column(N->getColumn()) {}
 
   bool isKeyOf(const MDLexicalBlock *RHS) const {
-    return Scope == RHS->getScope() && File == RHS->getFile() &&
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
            Line == RHS->getLine() && Column == RHS->getColumn();
   }
   unsigned getHashValue() const {
@@ -612,11 +616,11 @@ template <> struct MDNodeKeyImpl<MDLexicalBlockFile> {
   MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Discriminator)
       : Scope(Scope), File(File), Discriminator(Discriminator) {}
   MDNodeKeyImpl(const MDLexicalBlockFile *N)
-      : Scope(N->getScope()), File(N->getFile()),
+      : Scope(N->getRawScope()), File(N->getRawFile()),
         Discriminator(N->getDiscriminator()) {}
 
   bool isKeyOf(const MDLexicalBlockFile *RHS) const {
-    return Scope == RHS->getScope() && File == RHS->getFile() &&
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
            Discriminator == RHS->getDiscriminator();
   }
   unsigned getHashValue() const {
@@ -633,11 +637,11 @@ template <> struct MDNodeKeyImpl<MDNamespace> {
   MDNodeKeyImpl(Metadata *Scope, Metadata *File, StringRef Name, unsigned Line)
       : Scope(Scope), File(File), Name(Name), Line(Line) {}
   MDNodeKeyImpl(const MDNamespace *N)
-      : Scope(N->getScope()), File(N->getFile()), Name(N->getName()),
+      : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getName()),
         Line(N->getLine()) {}
 
   bool isKeyOf(const MDNamespace *RHS) const {
-    return Scope == RHS->getScope() && File == RHS->getFile() &&
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
            Name == RHS->getName() && Line == RHS->getLine();
   }
   unsigned getHashValue() const {
@@ -651,10 +655,10 @@ template <> struct MDNodeKeyImpl<MDTemplateTypeParameter> {
 
   MDNodeKeyImpl(StringRef Name, Metadata *Type) : Name(Name), Type(Type) {}
   MDNodeKeyImpl(const MDTemplateTypeParameter *N)
-      : Name(N->getName()), Type(N->getType()) {}
+      : Name(N->getName()), Type(N->getRawType()) {}
 
   bool isKeyOf(const MDTemplateTypeParameter *RHS) const {
-    return Name == RHS->getName() && Type == RHS->getType();
+    return Name == RHS->getName() && Type == RHS->getRawType();
   }
   unsigned getHashValue() const { return hash_combine(Name, Type); }
 };
@@ -668,12 +672,12 @@ template <> struct MDNodeKeyImpl<MDTemplateValueParameter> {
   MDNodeKeyImpl(unsigned Tag, StringRef Name, Metadata *Type, Metadata *Value)
       : Tag(Tag), Name(Name), Type(Type), Value(Value) {}
   MDNodeKeyImpl(const MDTemplateValueParameter *N)
-      : Tag(N->getTag()), Name(N->getName()), Type(N->getType()),
+      : Tag(N->getTag()), Name(N->getName()), Type(N->getRawType()),
         Value(N->getValue()) {}
 
   bool isKeyOf(const MDTemplateValueParameter *RHS) const {
     return Tag == RHS->getTag() && Name == RHS->getName() &&
-           Type == RHS->getType() && Value == RHS->getValue();
+           Type == RHS->getRawType() && Value == RHS->getValue();
   }
   unsigned getHashValue() const { return hash_combine(Tag, Name, Type, Value); }
 };
@@ -699,21 +703,22 @@ template <> struct MDNodeKeyImpl<MDGlobalVariable> {
         IsDefinition(IsDefinition), Variable(Variable),
         StaticDataMemberDeclaration(StaticDataMemberDeclaration) {}
   MDNodeKeyImpl(const MDGlobalVariable *N)
-      : Scope(N->getScope()), Name(N->getName()),
-        LinkageName(N->getLinkageName()), File(N->getFile()),
-        Line(N->getLine()), Type(N->getType()),
+      : Scope(N->getRawScope()), Name(N->getName()),
+        LinkageName(N->getLinkageName()), File(N->getRawFile()),
+        Line(N->getLine()), Type(N->getRawType()),
         IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
-        Variable(N->getVariable()),
-        StaticDataMemberDeclaration(N->getStaticDataMemberDeclaration()) {}
+        Variable(N->getRawVariable()),
+        StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()) {}
 
   bool isKeyOf(const MDGlobalVariable *RHS) const {
-    return Scope == RHS->getScope() && Name == RHS->getName() &&
-           LinkageName == RHS->getLinkageName() && File == RHS->getFile() &&
-           Line == RHS->getLine() && Type == RHS->getType() &&
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
            IsLocalToUnit == RHS->isLocalToUnit() &&
            IsDefinition == RHS->isDefinition() &&
-           Variable == RHS->getVariable() &&
-           StaticDataMemberDeclaration == RHS->getStaticDataMemberDeclaration();
+           Variable == RHS->getRawVariable() &&
+           StaticDataMemberDeclaration ==
+               RHS->getRawStaticDataMemberDeclaration();
   }
   unsigned getHashValue() const {
     return hash_combine(Scope, Name, LinkageName, File, Line, Type,
@@ -731,28 +736,24 @@ template <> struct MDNodeKeyImpl<MDLocalVariable> {
   Metadata *Type;
   unsigned Arg;
   unsigned Flags;
-  Metadata *InlinedAt;
 
   MDNodeKeyImpl(unsigned Tag, Metadata *Scope, StringRef Name, Metadata *File,
-                unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags,
-                Metadata *InlinedAt)
+                unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags)
       : Tag(Tag), Scope(Scope), Name(Name), File(File), Line(Line), Type(Type),
-        Arg(Arg), Flags(Flags), InlinedAt(InlinedAt) {}
+        Arg(Arg), Flags(Flags) {}
   MDNodeKeyImpl(const MDLocalVariable *N)
-      : Tag(N->getTag()), Scope(N->getScope()), Name(N->getName()),
-        File(N->getFile()), Line(N->getLine()), Type(N->getType()),
-        Arg(N->getArg()), Flags(N->getFlags()), InlinedAt(N->getInlinedAt()) {}
+      : Tag(N->getTag()), Scope(N->getRawScope()), Name(N->getName()),
+        File(N->getRawFile()), Line(N->getLine()), Type(N->getRawType()),
+        Arg(N->getArg()), Flags(N->getFlags()) {}
 
   bool isKeyOf(const MDLocalVariable *RHS) const {
-    return Tag == RHS->getTag() && Scope == RHS->getScope() &&
-           Name == RHS->getName() && File == RHS->getFile() &&
-           Line == RHS->getLine() && Type == RHS->getType() &&
-           Arg == RHS->getArg() && Flags == RHS->getFlags() &&
-           InlinedAt == RHS->getInlinedAt();
+    return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
+           Name == RHS->getName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
+           Arg == RHS->getArg() && Flags == RHS->getFlags();
   }
   unsigned getHashValue() const {
-    return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags,
-                        InlinedAt);
+    return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags);
   }
 };
 
@@ -785,15 +786,15 @@ template <> struct MDNodeKeyImpl<MDObjCProperty> {
       : Name(Name), File(File), Line(Line), GetterName(GetterName),
         SetterName(SetterName), Attributes(Attributes), Type(Type) {}
   MDNodeKeyImpl(const MDObjCProperty *N)
-      : Name(N->getName()), File(N->getFile()), Line(N->getLine()),
+      : Name(N->getName()), File(N->getRawFile()), Line(N->getLine()),
         GetterName(N->getGetterName()), SetterName(N->getSetterName()),
-        Attributes(N->getAttributes()), Type(N->getType()) {}
+        Attributes(N->getAttributes()), Type(N->getRawType()) {}
 
   bool isKeyOf(const MDObjCProperty *RHS) const {
-    return Name == RHS->getName() && File == RHS->getFile() &&
+    return Name == RHS->getName() && File == RHS->getRawFile() &&
            Line == RHS->getLine() && GetterName == RHS->getGetterName() &&
            SetterName == RHS->getSetterName() &&
-           Attributes == RHS->getAttributes() && Type == RHS->getType();
+           Attributes == RHS->getAttributes() && Type == RHS->getRawType();
   }
   unsigned getHashValue() const {
     return hash_combine(Name, File, Line, GetterName, SetterName, Attributes,
@@ -812,12 +813,12 @@ template <> struct MDNodeKeyImpl<MDImportedEntity> {
                 StringRef Name)
       : Tag(Tag), Scope(Scope), Entity(Entity), Line(Line), Name(Name) {}
   MDNodeKeyImpl(const MDImportedEntity *N)
-      : Tag(N->getTag()), Scope(N->getScope()), Entity(N->getEntity()),
+      : Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()),
         Line(N->getLine()), Name(N->getName()) {}
 
   bool isKeyOf(const MDImportedEntity *RHS) const {
-    return Tag == RHS->getTag() && Scope == RHS->getScope() &&
-           Entity == RHS->getEntity() && Line == RHS->getLine() &&
+    return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
+           Entity == RHS->getRawEntity() && Line == RHS->getLine() &&
            Name == RHS->getName();
   }
   unsigned getHashValue() const {
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 9a365d1..6870032 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -293,7 +293,7 @@ public:
     Pass(PT_PassManager, ID), PMDataManager() { }
 
   // Delete on the fly managers.
-  virtual ~MPPassManager() {
+  ~MPPassManager() override {
     for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
            I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
          I != E; ++I) {
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 0ad3c5c..93098b9 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -446,6 +446,10 @@ void MDNode::makeUniqued() {
   assert(isTemporary() && "Expected this to be temporary");
   assert(!isResolved() && "Expected this to be unresolved");
 
+  // Enable uniquing callbacks.
+  for (auto &Op : mutable_operands())
+    Op.reset(Op.get(), this);
+
   // Make this 'uniqued'.
   Storage = Uniqued;
   if (!countUnresolvedOperands())
@@ -1035,7 +1039,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
 
   // Handle 'dbg' as a special case since it is not stored in the hash table.
   if (KindID == LLVMContext::MD_dbg) {
-    DbgLoc = DebugLoc::getFromDILocation(Node);
+    DbgLoc = DebugLoc(Node);
     return;
   }
   
@@ -1114,7 +1118,7 @@ void Instruction::getAllMetadataImpl(
   Result.clear();
   
   // Handle 'dbg' as a special case since it is not stored in the hash table.
-  if (!DbgLoc.isUnknown()) {
+  if (DbgLoc) {
     Result.push_back(
         std::make_pair((unsigned)LLVMContext::MD_dbg, DbgLoc.getAsMDNode()));
     if (!hasMetadataHashEntry()) return;
diff --git a/lib/IR/UseListOrder.cpp b/lib/IR/UseListOrder.cpp
deleted file mode 100644
index d064e67..0000000
--- a/lib/IR/UseListOrder.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- UseListOrder.cpp - Implement Use List Order ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implement structures and command-line options for preserving use-list order.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/UseListOrder.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-static cl::opt<bool> PreserveBitcodeUseListOrder(
-    "preserve-bc-use-list-order",
-    cl::desc("Experimental support to preserve bitcode use-list order."),
-    cl::init(false), cl::Hidden);
-
-static cl::opt<bool> PreserveAssemblyUseListOrder(
-    "preserve-ll-use-list-order",
-    cl::desc("Experimental support to preserve assembly use-list order."),
-    cl::init(false), cl::Hidden);
-
-bool llvm::shouldPreserveBitcodeUseListOrder() {
-  return PreserveBitcodeUseListOrder;
-}
-
-bool llvm::shouldPreserveAssemblyUseListOrder() {
-  return PreserveAssemblyUseListOrder;
-}
-
-void llvm::setPreserveBitcodeUseListOrder(bool ShouldPreserve) {
-  PreserveBitcodeUseListOrder = ShouldPreserve;
-}
-
-void llvm::setPreserveAssemblyUseListOrder(bool ShouldPreserve) {
-  PreserveAssemblyUseListOrder = ShouldPreserve;
-}
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 78bfca4..f6eb427 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -525,7 +525,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
 
   // Return values from call sites specifically marked as dereferenceable are
   // also okay.
-  if (ImmutableCallSite CS = V) {
+  if (auto CS = ImmutableCallSite(V)) {
     if (uint64_t Bytes = CS.getDereferenceableBytes(0)) {
       Type *Ty = V->getType()->getPointerElementType();
       if (Ty->isSized() && DL.getTypeStoreSize(Ty) <= Bytes)
@@ -595,7 +595,7 @@ bool Value::isDereferenceablePointer(const DataLayout &DL) const {
     APInt DerefBytes(Offset.getBitWidth(), 0);
     if (const Argument *A = dyn_cast<Argument>(BV))
       DerefBytes = A->getDereferenceableBytes();
-    else if (ImmutableCallSite CS = BV)
+    else if (auto CS = ImmutableCallSite(BV))
       DerefBytes = CS.getDereferenceableBytes(0);
 
     if (DerefBytes.getBoolValue() && Offset.isNonNegative()) {
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index fcf48c4..fba78e9 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -87,10 +87,9 @@ struct VerifierSupport {
 
   /// \brief Track the brokenness of the module while recursively visiting.
   bool Broken;
-  bool EverBroken;
 
   explicit VerifierSupport(raw_ostream &OS)
-      : OS(OS), M(nullptr), Broken(false), EverBroken(false) {}
+      : OS(OS), M(nullptr), Broken(false) {}
 
 private:
   void Write(const Value *V) {
@@ -111,6 +110,10 @@ private:
     OS << '\n';
   }
 
+  template <class T> void Write(const MDTupleTypedArrayWrapper<T> &MD) {
+    Write(MD.get());
+  }
+
   void Write(const NamedMDNode *NMD) {
     if (!NMD)
       return;
@@ -145,7 +148,7 @@ public:
   /// something is not correct.
   void CheckFailed(const Twine &Message) {
     OS << Message << '\n';
-    EverBroken = Broken = true;
+    Broken = true;
   }
 
   /// \brief A check failed (with values to print).
@@ -175,6 +178,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// \brief Keep track of the metadata nodes that have been checked already.
   SmallPtrSet<const Metadata *, 32> MDNodes;
 
+  /// \brief Track unresolved string-based type references.
+  SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
+
   /// \brief The personality function referenced by the LandingPadInsts.
   /// All LandingPadInsts within the same function must use the same
   /// personality function.
@@ -268,8 +274,8 @@ public:
     visitModuleFlags(M);
     visitModuleIdents(M);
 
-    // Verify debug info last.
-    verifyDebugInfo();
+    // Verify type referneces last.
+    verifyTypeRefs();
 
     return !Broken;
   }
@@ -296,8 +302,37 @@ private:
   void visitBasicBlock(BasicBlock &BB);
   void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty);
 
+  template <class Ty> bool isValidMetadataArray(const MDTuple &N);
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
 #include "llvm/IR/Metadata.def"
+  void visitMDScope(const MDScope &N);
+  void visitMDDerivedTypeBase(const MDDerivedTypeBase &N);
+  void visitMDVariable(const MDVariable &N);
+  void visitMDLexicalBlockBase(const MDLexicalBlockBase &N);
+  void visitMDTemplateParameter(const MDTemplateParameter &N);
+
+  void visitTemplateParams(const MDNode &N, const Metadata &RawParams);
+
+  /// \brief Check for a valid string-based type reference.
+  ///
+  /// Checks if \c MD is a string-based type reference.  If it is, keeps track
+  /// of it (and its user, \c N) for error messages later.
+  bool isValidUUID(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid type reference.
+  ///
+  /// Checks for subclasses of \a MDType, or \a isValidUUID().
+  bool isTypeRef(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid scope reference.
+  ///
+  /// Checks for subclasses of \a MDScope, or \a isValidUUID().
+  bool isScopeRef(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid debug info reference.
+  ///
+  /// Checks for subclasses of \a DebugNode, or \a isValidUUID().
+  bool isDIRef(const MDNode &N, const Metadata *MD);
 
   // InstVisitor overrides...
   using InstVisitor<Verifier>::visit;
@@ -371,9 +406,11 @@ private:
   void verifyFrameRecoverIndices();
 
   // Module-level debug info verification...
-  void verifyDebugInfo();
-  void processInstructions(DebugInfoFinder &Finder);
-  void processCallInst(DebugInfoFinder &Finder, const CallInst &CI);
+  void verifyTypeRefs();
+  template <class MapTy>
+  void verifyBitPieceExpression(const DbgInfoIntrinsic &I,
+                                const MapTy &TypeRefs);
+  void visitUnresolvedTypeRef(const MDString *S, const MDNode *N);
 };
 } // End anonymous namespace
 
@@ -566,13 +603,14 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
 void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
   for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
     MDNode *MD = NMD.getOperand(i);
-    if (!MD)
-      continue;
 
     if (NMD.getName() == "llvm.dbg.cu") {
-      Assert(isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD);
+      Assert(MD && isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD);
     }
 
+    if (!MD)
+      continue;
+
     visitMDNode(*MD);
   }
 }
@@ -658,6 +696,58 @@ void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
     visitValueAsMetadata(*V, F);
 }
 
+bool Verifier::isValidUUID(const MDNode &N, const Metadata *MD) {
+  auto *S = dyn_cast<MDString>(MD);
+  if (!S)
+    return false;
+  if (S->getString().empty())
+    return false;
+
+  // Keep track of names of types referenced via UUID so we can check that they
+  // actually exist.
+  UnresolvedTypeRefs.insert(std::make_pair(S, &N));
+  return true;
+}
+
+/// \brief Check if a value can be a reference to a type.
+bool Verifier::isTypeRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<MDType>(MD);
+}
+
+/// \brief Check if a value can be a ScopeRef.
+bool Verifier::isScopeRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<MDScope>(MD);
+}
+
+/// \brief Check if a value can be a debug info ref.
+bool Verifier::isDIRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<DebugNode>(MD);
+}
+
+template <class Ty>
+bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) {
+  for (Metadata *MD : N.operands()) {
+    if (MD) {
+      if (!isa<Ty>(MD))
+        return false;
+    } else {
+      if (!AllowNull)
+        return false;
+    }
+  }
+  return true;
+}
+
+template <class Ty>
+bool isValidMetadataArray(const MDTuple &N) {
+  return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ false);
+}
+
+template <class Ty>
+bool isValidMetadataNullArray(const MDTuple &N) {
+  return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ true);
+}
+
 void Verifier::visitMDLocation(const MDLocation &N) {
   Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()),
          "location requires a valid scope", &N, N.getRawScope());
@@ -669,8 +759,14 @@ void Verifier::visitGenericDebugNode(const GenericDebugNode &N) {
   Assert(N.getTag(), "invalid tag", &N);
 }
 
+void Verifier::visitMDScope(const MDScope &N) {
+  if (auto *F = N.getRawFile())
+    Assert(isa<MDFile>(F), "invalid file", &N, F);
+}
+
 void Verifier::visitMDSubrange(const MDSubrange &N) {
   Assert(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
+  Assert(N.getCount() >= -1, "invalid subrange count", &N);
 }
 
 void Verifier::visitMDEnumerator(const MDEnumerator &N) {
@@ -683,7 +779,39 @@ void Verifier::visitMDBasicType(const MDBasicType &N) {
          "invalid tag", &N);
 }
 
+void Verifier::visitMDDerivedTypeBase(const MDDerivedTypeBase &N) {
+  // Common scope checks.
+  visitMDScope(N);
+
+  Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
+  Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
+         N.getBaseType());
+
+  // FIXME: Sink this into the subclass verifies.
+  if (!N.getFile() || N.getFile()->getFilename().empty()) {
+    // Check whether the filename is allowed to be empty.
+    uint16_t Tag = N.getTag();
+    Assert(
+        Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+            Tag == dwarf::DW_TAG_pointer_type ||
+            Tag == dwarf::DW_TAG_ptr_to_member_type ||
+            Tag == dwarf::DW_TAG_reference_type ||
+            Tag == dwarf::DW_TAG_rvalue_reference_type ||
+            Tag == dwarf::DW_TAG_restrict_type ||
+            Tag == dwarf::DW_TAG_array_type ||
+            Tag == dwarf::DW_TAG_enumeration_type ||
+            Tag == dwarf::DW_TAG_subroutine_type ||
+            Tag == dwarf::DW_TAG_inheritance || Tag == dwarf::DW_TAG_friend ||
+            Tag == dwarf::DW_TAG_structure_type ||
+            Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef,
+        "derived/composite type requires a filename", &N, N.getFile());
+  }
+}
+
 void Verifier::visitMDDerivedType(const MDDerivedType &N) {
+  // Common derived type checks.
+  visitMDDerivedTypeBase(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_typedef ||
              N.getTag() == dwarf::DW_TAG_pointer_type ||
              N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
@@ -696,9 +824,30 @@ void Verifier::visitMDDerivedType(const MDDerivedType &N) {
              N.getTag() == dwarf::DW_TAG_inheritance ||
              N.getTag() == dwarf::DW_TAG_friend,
          "invalid tag", &N);
+  if (N.getTag() == dwarf::DW_TAG_ptr_to_member_type) {
+    Assert(isTypeRef(N, N.getExtraData()), "invalid pointer to member type", &N,
+           N.getExtraData());
+  }
+}
+
+static bool hasConflictingReferenceFlags(unsigned Flags) {
+  return (Flags & DebugNode::FlagLValueReference) &&
+         (Flags & DebugNode::FlagRValueReference);
+}
+
+void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) {
+  auto *Params = dyn_cast<MDTuple>(&RawParams);
+  Assert(Params, "invalid template params", &N, &RawParams);
+  for (Metadata *Op : Params->operands()) {
+    Assert(Op && isa<MDTemplateParameter>(Op), "invalid template parameter", &N,
+           Params, Op);
+  }
 }
 
 void Verifier::visitMDCompositeType(const MDCompositeType &N) {
+  // Common derived type checks.
+  visitMDDerivedTypeBase(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_array_type ||
              N.getTag() == dwarf::DW_TAG_structure_type ||
              N.getTag() == dwarf::DW_TAG_union_type ||
@@ -706,10 +855,29 @@ void Verifier::visitMDCompositeType(const MDCompositeType &N) {
              N.getTag() == dwarf::DW_TAG_subroutine_type ||
              N.getTag() == dwarf::DW_TAG_class_type,
          "invalid tag", &N);
+
+  Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
+         "invalid composite elements", &N, N.getRawElements());
+  Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N,
+         N.getRawVTableHolder());
+  Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
+         "invalid composite elements", &N, N.getRawElements());
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
+  if (auto *Params = N.getRawTemplateParams())
+    visitTemplateParams(N, *Params);
 }
 
 void Verifier::visitMDSubroutineType(const MDSubroutineType &N) {
   Assert(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N);
+  if (auto *Types = N.getRawTypeArray()) {
+    Assert(isa<MDTuple>(Types), "invalid composite elements", &N, Types);
+    for (Metadata *Ty : N.getTypeArray()->operands()) {
+      Assert(isTypeRef(N, Ty), "invalid subroutine type ref", &N, Types, Ty);
+    }
+  }
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
 }
 
 void Verifier::visitMDFile(const MDFile &N) {
@@ -718,45 +886,195 @@ void Verifier::visitMDFile(const MDFile &N) {
 
 void Verifier::visitMDCompileUnit(const MDCompileUnit &N) {
   Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
+
+  // Don't bother verifying the compilation directory or producer string
+  // as those could be empty.
+  Assert(N.getRawFile() && isa<MDFile>(N.getRawFile()),
+         "invalid file", &N, N.getRawFile());
+  Assert(!N.getFile()->getFilename().empty(), "invalid filename", &N,
+         N.getFile());
+
+  if (auto *Array = N.getRawEnumTypes()) {
+    Assert(isa<MDTuple>(Array), "invalid enum list", &N, Array);
+    for (Metadata *Op : N.getEnumTypes()->operands()) {
+      auto *Enum = dyn_cast_or_null<MDCompositeType>(Op);
+      Assert(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type,
+             "invalid enum type", &N, N.getEnumTypes(), Op);
+    }
+  }
+  if (auto *Array = N.getRawRetainedTypes()) {
+    Assert(isa<MDTuple>(Array), "invalid retained type list", &N, Array);
+    for (Metadata *Op : N.getRetainedTypes()->operands()) {
+      Assert(Op && isa<MDType>(Op), "invalid retained type", &N, Op);
+    }
+  }
+  if (auto *Array = N.getRawSubprograms()) {
+    Assert(isa<MDTuple>(Array), "invalid subprogram list", &N, Array);
+    for (Metadata *Op : N.getSubprograms()->operands()) {
+      Assert(Op && isa<MDSubprogram>(Op), "invalid subprogram ref", &N, Op);
+    }
+  }
+  if (auto *Array = N.getRawGlobalVariables()) {
+    Assert(isa<MDTuple>(Array), "invalid global variable list", &N, Array);
+    for (Metadata *Op : N.getGlobalVariables()->operands()) {
+      Assert(Op && isa<MDGlobalVariable>(Op), "invalid global variable ref", &N,
+             Op);
+    }
+  }
+  if (auto *Array = N.getRawImportedEntities()) {
+    Assert(isa<MDTuple>(Array), "invalid imported entity list", &N, Array);
+    for (Metadata *Op : N.getImportedEntities()->operands()) {
+      Assert(Op && isa<MDImportedEntity>(Op), "invalid imported entity ref", &N,
+             Op);
+    }
+  }
 }
 
 void Verifier::visitMDSubprogram(const MDSubprogram &N) {
   Assert(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N);
+  Assert(isScopeRef(N, N.getRawScope()), "invalid scope", &N, N.getRawScope());
+  if (auto *T = N.getRawType())
+    Assert(isa<MDSubroutineType>(T), "invalid subroutine type", &N, T);
+  Assert(isTypeRef(N, N.getRawContainingType()), "invalid containing type", &N,
+         N.getRawContainingType());
+  if (auto *RawF = N.getRawFunction()) {
+    auto *FMD = dyn_cast<ConstantAsMetadata>(RawF);
+    auto *F = FMD ? FMD->getValue() : nullptr;
+    auto *FT = F ? dyn_cast<PointerType>(F->getType()) : nullptr;
+    Assert(F && FT && isa<FunctionType>(FT->getElementType()),
+           "invalid function", &N, F, FT);
+  }
+  if (auto *Params = N.getRawTemplateParams())
+    visitTemplateParams(N, *Params);
+  if (auto *S = N.getRawDeclaration()) {
+    Assert(isa<MDSubprogram>(S) && !cast<MDSubprogram>(S)->isDefinition(),
+           "invalid subprogram declaration", &N, S);
+  }
+  if (auto *RawVars = N.getRawVariables()) {
+    auto *Vars = dyn_cast<MDTuple>(RawVars);
+    Assert(Vars, "invalid variable list", &N, RawVars);
+    for (Metadata *Op : Vars->operands()) {
+      Assert(Op && isa<MDLocalVariable>(Op), "invalid local variable", &N, Vars,
+             Op);
+    }
+  }
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
+
+  auto *F = N.getFunction();
+  if (!F)
+    return;
+
+  // Check that all !dbg attachments lead to back to N (or, at least, another
+  // subprogram that describes the same function).
+  //
+  // FIXME: Check this incrementally while visiting !dbg attachments.
+  // FIXME: Only check when N is the canonical subprogram for F.
+  SmallPtrSet<const MDNode *, 32> Seen;
+  for (auto &BB : *F)
+    for (auto &I : BB) {
+      // Be careful about using MDLocation here since we might be dealing with
+      // broken code (this is the Verifier after all).
+      MDLocation *DL =
+          dyn_cast_or_null<MDLocation>(I.getDebugLoc().getAsMDNode());
+      if (!DL)
+        continue;
+      if (!Seen.insert(DL).second)
+        continue;
+
+      MDLocalScope *Scope = DL->getInlinedAtScope();
+      if (Scope && !Seen.insert(Scope).second)
+        continue;
+
+      MDSubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
+      if (SP && !Seen.insert(SP).second)
+        continue;
+
+      // FIXME: Once N is canonical, check "SP == &N".
+      Assert(SP->describes(F),
+             "!dbg attachment points at wrong subprogram for function", &N, F,
+             &I, DL, Scope, SP);
+    }
 }
 
-void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) {
+void Verifier::visitMDLexicalBlockBase(const MDLexicalBlockBase &N) {
   Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+  Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()),
+         "invalid local scope", &N, N.getRawScope());
+}
+
+void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) {
+  visitMDLexicalBlockBase(N);
+
+  Assert(N.getLine() || !N.getColumn(),
+         "cannot have column info without line info", &N);
 }
 
 void Verifier::visitMDLexicalBlockFile(const MDLexicalBlockFile &N) {
-  Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+  visitMDLexicalBlockBase(N);
 }
 
 void Verifier::visitMDNamespace(const MDNamespace &N) {
   Assert(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
+  if (auto *S = N.getRawScope())
+    Assert(isa<MDScope>(S), "invalid scope ref", &N, S);
+}
+
+void Verifier::visitMDTemplateParameter(const MDTemplateParameter &N) {
+  Assert(isTypeRef(N, N.getType()), "invalid type ref", &N, N.getType());
 }
 
 void Verifier::visitMDTemplateTypeParameter(const MDTemplateTypeParameter &N) {
+  visitMDTemplateParameter(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag",
          &N);
 }
 
 void Verifier::visitMDTemplateValueParameter(
     const MDTemplateValueParameter &N) {
+  visitMDTemplateParameter(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_template_value_parameter ||
              N.getTag() == dwarf::DW_TAG_GNU_template_template_param ||
              N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack,
          "invalid tag", &N);
 }
 
+void Verifier::visitMDVariable(const MDVariable &N) {
+  if (auto *S = N.getRawScope())
+    Assert(isa<MDScope>(S), "invalid scope", &N, S);
+  Assert(isTypeRef(N, N.getRawType()), "invalid type ref", &N, N.getRawType());
+  if (auto *F = N.getRawFile())
+    Assert(isa<MDFile>(F), "invalid file", &N, F);
+}
+
 void Verifier::visitMDGlobalVariable(const MDGlobalVariable &N) {
+  // Checks common to all variables.
+  visitMDVariable(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
+  Assert(!N.getName().empty(), "missing global variable name", &N);
+  if (auto *V = N.getRawVariable()) {
+    Assert(isa<ConstantAsMetadata>(V) &&
+               !isa<Function>(cast<ConstantAsMetadata>(V)->getValue()),
+           "invalid global varaible ref", &N, V);
+  }
+  if (auto *Member = N.getRawStaticDataMemberDeclaration()) {
+    Assert(isa<MDDerivedType>(Member), "invalid static data member declaration",
+           &N, Member);
+  }
 }
 
 void Verifier::visitMDLocalVariable(const MDLocalVariable &N) {
+  // Checks common to all variables.
+  visitMDVariable(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_auto_variable ||
              N.getTag() == dwarf::DW_TAG_arg_variable,
          "invalid tag", &N);
+  Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()),
+         "local variable requires a valid scope", &N, N.getRawScope());
 }
 
 void Verifier::visitMDExpression(const MDExpression &N) {
@@ -765,12 +1083,20 @@ void Verifier::visitMDExpression(const MDExpression &N) {
 
 void Verifier::visitMDObjCProperty(const MDObjCProperty &N) {
   Assert(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
+  if (auto *T = N.getRawType())
+    Assert(isa<MDType>(T), "invalid type ref", &N, T);
+  if (auto *F = N.getRawFile())
+    Assert(isa<MDFile>(F), "invalid file", &N, F);
 }
 
 void Verifier::visitMDImportedEntity(const MDImportedEntity &N) {
   Assert(N.getTag() == dwarf::DW_TAG_imported_module ||
              N.getTag() == dwarf::DW_TAG_imported_declaration,
          "invalid tag", &N);
+  if (auto *S = N.getRawScope())
+    Assert(isa<MDScope>(S), "invalid scope for imported entity", &N, S);
+  Assert(isDIRef(N, N.getEntity()), "invalid imported entity", &N,
+         N.getEntity());
 }
 
 void Verifier::visitComdat(const Comdat &C) {
@@ -2133,7 +2459,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
-    GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
+      GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
   Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP);
 
   Assert(GEP.getType()->getScalarType()->isPointerTy() &&
@@ -2214,9 +2540,7 @@ void Verifier::visitRangeMetadata(Instruction& I,
 void Verifier::visitLoadInst(LoadInst &LI) {
   PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
   Assert(PTy, "Load operand must be a pointer.", &LI);
-  Type *ElTy = PTy->getElementType();
-  Assert(ElTy == LI.getType(),
-         "Load result type does not match pointer operand type!", &LI, ElTy);
+  Type *ElTy = LI.getType();
   Assert(LI.getAlignment() <= Value::MaximumAlignment,
          "huge alignment values are unsupported", &LI);
   if (LI.isAtomic()) {
@@ -2885,6 +3209,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     Assert(!SawFrameEscape,
            "multiple calls to llvm.frameescape in one function", &CI);
     for (Value *Arg : CI.arg_operands()) {
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Null values are allowed as placeholders.
       auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
       Assert(AI && AI->isStaticAlloca(),
              "llvm.frameescape only accepts static allocas", &CI);
@@ -2909,16 +3235,11 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     break;
   }
 
-  case Intrinsic::eh_unwindhelp: {
-    auto *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
-    Assert(AI && AI->isStaticAlloca(),
-           "llvm.eh.unwindhelp requires a static alloca", &CI);
-    break;
-  }
-
   case Intrinsic::experimental_gc_statepoint:
     Assert(!CI.isInlineAsm(),
            "gc.statepoint support for inline assembly unimplemented", &CI);
+    Assert(CI.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", &CI);
 
     VerifyStatepoint(ImmutableCallSite(&CI));
     break;
@@ -2926,6 +3247,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   case Intrinsic::experimental_gc_result_float:
   case Intrinsic::experimental_gc_result_ptr:
   case Intrinsic::experimental_gc_result: {
+    Assert(CI.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", &CI);
     // Are we tied to a statepoint properly?
     CallSite StatepointCS(CI.getArgOperand(0));
     const Function *StatepointFn =
@@ -3035,6 +3358,25 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   };
 }
 
+/// \brief Carefully grab the subprogram from a local scope.
+///
+/// This carefully grabs the subprogram from a local scope, avoiding the
+/// built-in assertions that would typically fire.
+static MDSubprogram *getSubprogram(Metadata *LocalScope) {
+  if (!LocalScope)
+    return nullptr;
+
+  if (auto *SP = dyn_cast<MDSubprogram>(LocalScope))
+    return SP;
+
+  if (auto *LB = dyn_cast<MDLexicalBlockBase>(LocalScope))
+    return getSubprogram(LB->getRawScope());
+
+  // Just return null; broken scope chains are checked elsewhere.
+  assert(!isa<MDLocalScope>(LocalScope) && "Unknown type of local scope");
+  return nullptr;
+}
+
 template <class DbgIntrinsicTy>
 void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
   auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
@@ -3047,61 +3389,145 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
   Assert(isa<MDExpression>(DII.getRawExpression()),
          "invalid llvm.dbg." + Kind + " intrinsic expression", &DII,
          DII.getRawExpression());
-}
 
-void Verifier::verifyDebugInfo() {
-  // Run the debug info verifier only if the regular verifier succeeds, since
-  // sometimes checks that have already failed will cause crashes here.
-  if (EverBroken || !VerifyDebugInfo)
-    return;
+  // Ignore broken !dbg attachments; they're checked elsewhere.
+  if (MDNode *N = DII.getDebugLoc().getAsMDNode())
+    if (!isa<MDLocation>(N))
+      return;
 
-  DebugInfoFinder Finder;
-  Finder.processModule(*M);
-  processInstructions(Finder);
+  BasicBlock *BB = DII.getParent();
+  Function *F = BB ? BB->getParent() : nullptr;
+
+  // The scopes for variables and !dbg attachments must agree.
+  MDLocalVariable *Var = DII.getVariable();
+  MDLocation *Loc = DII.getDebugLoc();
+  Assert(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment",
+         &DII, BB, F);
+
+  MDSubprogram *VarSP = getSubprogram(Var->getRawScope());
+  MDSubprogram *LocSP = getSubprogram(Loc->getRawScope());
+  if (!VarSP || !LocSP)
+    return; // Broken scope chains are checked elsewhere.
+
+  Assert(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind +
+                             " variable and !dbg attachment",
+         &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
+         Loc->getScope()->getSubprogram());
+}
+
+template <class MapTy>
+static uint64_t getVariableSize(const MDLocalVariable &V, const MapTy &Map) {
+  // Be careful of broken types (checked elsewhere).
+  const Metadata *RawType = V.getRawType();
+  while (RawType) {
+    // Try to get the size directly.
+    if (auto *T = dyn_cast<MDType>(RawType))
+      if (uint64_t Size = T->getSizeInBits())
+        return Size;
+
+    if (auto *DT = dyn_cast<MDDerivedType>(RawType)) {
+      // Look at the base type.
+      RawType = DT->getRawBaseType();
+      continue;
+    }
 
-  // Verify Debug Info.
-  //
-  // NOTE:  The loud braces are necessary for MSVC compatibility.
-  for (DICompileUnit CU : Finder.compile_units()) {
-    Assert(CU.Verify(), "DICompileUnit does not Verify!", CU);
-  }
-  for (DISubprogram S : Finder.subprograms()) {
-    Assert(S.Verify(), "DISubprogram does not Verify!", S);
-  }
-  for (DIGlobalVariable GV : Finder.global_variables()) {
-    Assert(GV.Verify(), "DIGlobalVariable does not Verify!", GV);
-  }
-  for (DIType T : Finder.types()) {
-    Assert(T.Verify(), "DIType does not Verify!", T);
+    if (auto *S = dyn_cast<MDString>(RawType)) {
+      // Don't error on missing types (checked elsewhere).
+      RawType = Map.lookup(S);
+      continue;
+    }
+
+    // Missing type or size.
+    break;
   }
-  for (DIScope S : Finder.scopes()) {
-    Assert(S.Verify(), "DIScope does not Verify!", S);
+
+  // Fail gracefully.
+  return 0;
+}
+
+template <class MapTy>
+void Verifier::verifyBitPieceExpression(const DbgInfoIntrinsic &I,
+                                        const MapTy &TypeRefs) {
+  MDLocalVariable *V;
+  MDExpression *E;
+  if (auto *DVI = dyn_cast<DbgValueInst>(&I)) {
+    V = dyn_cast_or_null<MDLocalVariable>(DVI->getRawVariable());
+    E = dyn_cast_or_null<MDExpression>(DVI->getRawExpression());
+  } else {
+    auto *DDI = cast<DbgDeclareInst>(&I);
+    V = dyn_cast_or_null<MDLocalVariable>(DDI->getRawVariable());
+    E = dyn_cast_or_null<MDExpression>(DDI->getRawExpression());
   }
+
+  // We don't know whether this intrinsic verified correctly.
+  if (!V || !E || !E->isValid())
+    return;
+
+  // Nothing to do if this isn't a bit piece expression.
+  if (!E->isBitPiece())
+    return;
+
+  // If there's no size, the type is broken, but that should be checked
+  // elsewhere.
+  uint64_t VarSize = getVariableSize(*V, TypeRefs);
+  if (!VarSize)
+    return;
+
+  unsigned PieceSize = E->getBitPieceSize();
+  unsigned PieceOffset = E->getBitPieceOffset();
+  Assert(PieceSize + PieceOffset <= VarSize,
+         "piece is larger than or outside of variable", &I, V, E);
+  Assert(PieceSize != VarSize, "piece covers entire variable", &I, V, E);
 }
 
-void Verifier::processInstructions(DebugInfoFinder &Finder) {
-  for (const Function &F : *M)
-    for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
-      if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg))
-        Finder.processLocation(*M, DILocation(MD));
-      if (const CallInst *CI = dyn_cast<CallInst>(&*I))
-        processCallInst(Finder, *CI);
-    }
+void Verifier::visitUnresolvedTypeRef(const MDString *S, const MDNode *N) {
+  // This is in its own function so we get an error for each bad type ref (not
+  // just the first).
+  Assert(false, "unresolved type ref", S, N);
 }
 
-void Verifier::processCallInst(DebugInfoFinder &Finder, const CallInst &CI) {
-  if (Function *F = CI.getCalledFunction())
-    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
-      switch (ID) {
-      case Intrinsic::dbg_declare:
-        Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI));
-        break;
-      case Intrinsic::dbg_value:
-        Finder.processValue(*M, cast<DbgValueInst>(&CI));
-        break;
-      default:
-        break;
-      }
+void Verifier::verifyTypeRefs() {
+  auto *CUs = M->getNamedMetadata("llvm.dbg.cu");
+  if (!CUs)
+    return;
+
+  // Visit all the compile units again to map the type references.
+  SmallDenseMap<const MDString *, const MDType *, 32> TypeRefs;
+  for (auto *CU : CUs->operands())
+    if (auto Ts = cast<MDCompileUnit>(CU)->getRetainedTypes())
+      for (MDType *Op : Ts)
+        if (auto *T = dyn_cast<MDCompositeType>(Op))
+          if (auto *S = T->getRawIdentifier()) {
+            UnresolvedTypeRefs.erase(S);
+            TypeRefs.insert(std::make_pair(S, T));
+          }
+
+  // Verify debug info intrinsic bit piece expressions.  This needs a second
+  // pass through the intructions, since we haven't built TypeRefs yet when
+  // verifying functions, and simply queuing the DbgInfoIntrinsics to evaluate
+  // later/now would queue up some that could be later deleted.
+  for (const Function &F : *M)
+    for (const BasicBlock &BB : F)
+      for (const Instruction &I : BB)
+        if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I))
+          verifyBitPieceExpression(*DII, TypeRefs);
+
+  // Return early if all typerefs were resolved.
+  if (UnresolvedTypeRefs.empty())
+    return;
+
+  // Sort the unresolved references by name so the output is deterministic.
+  typedef std::pair<const MDString *, const MDNode *> TypeRef;
+  SmallVector<TypeRef, 32> Unresolved(UnresolvedTypeRefs.begin(),
+                                      UnresolvedTypeRefs.end());
+  std::sort(Unresolved.begin(), Unresolved.end(),
+            [](const TypeRef &LHS, const TypeRef &RHS) {
+    return LHS.first->getString() < RHS.first->getString();
+  });
+
+  // Visit the unresolved refs (printing out the errors).
+  for (const TypeRef &TR : Unresolved)
+    visitUnresolvedTypeRef(TR.first, TR.second);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index a6f980b..b4a7011 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -38,7 +38,6 @@
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
@@ -215,7 +214,8 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
   }
 
   // write bitcode to it
-  WriteBitcodeToFile(IRLinker.getModule(), Out.os());
+  WriteBitcodeToFile(IRLinker.getModule(), Out.os(),
+                     /* ShouldPreserveUseListOrder */ true);
   Out.os().close();
 
   if (Out.os().has_error()) {
@@ -566,24 +566,20 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
   return true;
 }
 
-bool LTOCodeGenerator::compileOptimized(raw_ostream &out, std::string &errMsg) {
+bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out,
+                                        std::string &errMsg) {
   if (!this->determineTarget(errMsg))
     return false;
 
   Module *mergedModule = IRLinker.getModule();
 
-  // Mark which symbols can not be internalized
-  this->applyScopeRestrictions();
-
   legacy::PassManager codeGenPasses;
 
-  formatted_raw_ostream Out(out);
-
   // If the bitcode files contain ARC code and were compiled with optimization,
   // the ObjCARCContractPass must be run, so do it unconditionally here.
   codeGenPasses.add(createObjCARCContractPass());
 
-  if (TargetMach->addPassesToEmitFile(codeGenPasses, Out,
+  if (TargetMach->addPassesToEmitFile(codeGenPasses, out,
                                       TargetMachine::CGFT_ObjectFile)) {
     errMsg = "target file type not supported";
     return false;
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 49aa97d..5cdbca6 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -267,7 +267,7 @@ LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
       Constant *cn = gvn->getInitializer();
       if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
         if (ca->isCString()) {
-          name = ".objc_class_name_" + ca->getAsCString().str();
+          name = (".objc_class_name_" + ca->getAsCString()).str();
           return true;
         }
       }
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 21edc50..03ab9fb 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1269,15 +1269,11 @@ void ModuleLinker::stripReplacedSubprograms() {
   if (!CompileUnits)
     return;
   for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
-    DICompileUnit CU(CompileUnits->getOperand(I));
+    DICompileUnit CU = cast<MDCompileUnit>(CompileUnits->getOperand(I));
     assert(CU && "Expected valid compile unit");
 
-    DITypedArray<DISubprogram> SPs(CU.getSubprograms());
-    assert(SPs && "Expected valid subprogram array");
-
-    for (unsigned S = 0, SE = SPs.getNumElements(); S != SE; ++S) {
-      DISubprogram SP = SPs.getElement(S);
-      if (!SP || !SP.getFunction() || !Functions.count(SP.getFunction()))
+    for (MDSubprogram *SP : CU->getSubprograms()) {
+      if (!SP || !SP->getFunction() || !Functions.count(SP->getFunction()))
         continue;
 
       // Prevent DebugInfoFinder from tagging this as the canonical subprogram,
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index c99a3ee..8cb01c4 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -79,17 +79,6 @@ public:
                    uint8_t other, uint32_t shndx, bool Reserved);
 };
 
-struct ELFRelocationEntry {
-  uint64_t Offset; // Where is the relocation.
-  const MCSymbol *Symbol;       // The symbol to relocate with.
-  unsigned Type;   // The type of the relocation.
-  uint64_t Addend; // The addend to use.
-
-  ELFRelocationEntry(uint64_t Offset, const MCSymbol *Symbol, unsigned Type,
-                     uint64_t Addend)
-      : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {}
-};
-
 class ELFObjectWriter : public MCObjectWriter {
   FragmentWriter FWriter;
 
@@ -103,22 +92,13 @@ class ELFObjectWriter : public MCObjectWriter {
     static bool isLocal(const MCSymbolData &Data, bool isUsedInReloc);
     static bool IsELFMetaDataSection(const MCSectionData &SD);
     static uint64_t DataSectionSize(const MCSectionData &SD);
-    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
-                                       const MCSectionData &SD);
     static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
                                           const MCSectionData &SD);
 
-    void WriteDataSectionData(MCAssembler &Asm,
-                              const MCAsmLayout &Layout,
-                              const MCSectionELF &Section);
-
-    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
-      return Kind == X86::reloc_riprel_4byte ||
-        Kind == X86::reloc_riprel_4byte_movq_load;
-    }*/
+    void writeDataSectionData(MCAssembler &Asm, const MCAsmLayout &Layout,
+                              const MCSectionData &SD);
 
-    /// ELFSymbolData - Helper struct for containing some precomputed
-    /// information on symbols.
+    /// Helper struct for containing some precomputed information on symbols.
     struct ELFSymbolData {
       MCSymbolData *SymbolData;
       uint64_t StringIndex;
@@ -185,7 +165,7 @@ class ELFObjectWriter : public MCObjectWriter {
     }
 
   public:
-    ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS,
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_pwrite_stream &OS,
                     bool IsLittleEndian)
         : MCObjectWriter(OS, IsLittleEndian), FWriter(IsLittleEndian),
           TargetObjectWriter(MOTW), NeedsGOT(false) {}
@@ -204,7 +184,7 @@ class ELFObjectWriter : public MCObjectWriter {
       MCObjectWriter::reset();
     }
 
-    virtual ~ELFObjectWriter();
+    ~ELFObjectWriter() override;
 
     void WriteWord(uint64_t W) {
       if (is64Bit())
@@ -218,7 +198,6 @@ class ELFObjectWriter : public MCObjectWriter {
     }
 
     void WriteHeader(const MCAssembler &Asm,
-                     uint64_t SectionDataSize,
                      unsigned NumberOfSections);
 
     void WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
@@ -245,8 +224,6 @@ class ELFObjectWriter : public MCObjectWriter {
     typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
     // Map from a signature symbol to the group section
     typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
-    // Map from a section to the section with the relocations
-    typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
     // Map from a section to its offset
     typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
 
@@ -255,23 +232,18 @@ class ELFObjectWriter : public MCObjectWriter {
     /// \param Asm - The assembler.
     /// \param SectionIndexMap - Maps a section to its index.
     /// \param RevGroupMap - Maps a signature symbol to the group section.
-    /// \param NumRegularSections - Number of non-relocation sections.
     void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
                             const SectionIndexMapTy &SectionIndexMap,
-                            const RevGroupMapTy &RevGroupMap,
-                            unsigned NumRegularSections);
+                            const RevGroupMapTy &RevGroupMap);
 
-    void computeIndexMap(MCAssembler &Asm,
-                         SectionIndexMapTy &SectionIndexMap,
-                         RelMapTy &RelMap);
+    void computeIndexMap(MCAssembler &Asm, SectionIndexMapTy &SectionIndexMap);
 
     MCSectionData *createRelocationSection(MCAssembler &Asm,
                                            const MCSectionData &SD);
 
     void CompressDebugSections(MCAssembler &Asm, MCAsmLayout &Layout);
 
-    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
-                          const RelMapTy &RelMap);
+    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout);
 
     void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
                                 SectionIndexMapTy &SectionIndexMap);
@@ -279,23 +251,18 @@ class ELFObjectWriter : public MCObjectWriter {
     // Create the sections that show up in the symbol table. Currently
     // those are the .note.GNU-stack section and the group sections.
     void createIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                               GroupMapTy &GroupMap,
-                               RevGroupMapTy &RevGroupMap,
-                               SectionIndexMapTy &SectionIndexMap,
-                               RelMapTy &RelMap);
+                               GroupMapTy &GroupMap, RevGroupMapTy &RevGroupMap,
+                               SectionIndexMapTy &SectionIndexMap);
 
     void ExecutePostLayoutBinding(MCAssembler &Asm,
                                   const MCAsmLayout &Layout) override;
 
-    void writeSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+    void writeSectionHeader(ArrayRef<const MCSectionELF *> Sections,
+                            MCAssembler &Asm, const GroupMapTy &GroupMap,
                             const MCAsmLayout &Layout,
                             const SectionIndexMapTy &SectionIndexMap,
-                            const RelMapTy &RelMap,
                             const SectionOffsetMapTy &SectionOffsetMap);
 
-    void ComputeSectionOrder(MCAssembler &Asm,
-                             std::vector<const MCSectionELF*> &Sections);
-
     void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
                           uint64_t Address, uint64_t Offset,
                           uint64_t Size, uint32_t Link, uint32_t Info,
@@ -308,6 +275,7 @@ class ELFObjectWriter : public MCObjectWriter {
     bool
     IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                            const MCSymbolData &DataA,
+                                           const MCSymbolData *DataB,
                                            const MCFragment &FB,
                                            bool InSet,
                                            bool IsPCRel) const override;
@@ -317,7 +285,6 @@ class ELFObjectWriter : public MCObjectWriter {
     void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
     void writeSection(MCAssembler &Asm,
                       const SectionIndexMapTy &SectionIndexMap,
-                      const RelMapTy &RelMap,
                       uint32_t GroupSymbolIndex,
                       uint64_t Offset, uint64_t Size, uint64_t Alignment,
                       const MCSectionELF &Section);
@@ -384,8 +351,6 @@ void SymbolTableWriter::writeSymbol(uint32_t name, uint8_t info, uint64_t value,
 
   uint16_t Index = LargeIndex ? uint16_t(ELF::SHN_XINDEX) : shndx;
 
-  raw_svector_ostream OS(SymtabF->getContents());
-
   if (Is64Bit) {
     write(*SymtabF, name);  // st_name
     write(*SymtabF, info);  // st_info
@@ -438,7 +403,6 @@ ELFObjectWriter::~ELFObjectWriter()
 
 // Emit the ELF header.
 void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
-                                  uint64_t SectionDataSize,
                                   unsigned NumberOfSections) {
   // ELF Header
   // ----------
@@ -472,8 +436,7 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
   Write32(ELF::EV_CURRENT);         // e_version
   WriteWord(0);                    // e_entry, no entry point in .o file
   WriteWord(0);                    // e_phoff, no program header for .o
-  WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
-            sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
+  WriteWord(0);                     // e_shoff = sec hdr table off in bytes
 
   // e_flags = whatever the target wants
   Write32(Asm.getELFHeaderEFlags());
@@ -628,7 +591,7 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
 
   if (ESize) {
     int64_t Res;
-    if (!ESize->EvaluateAsAbsolute(Res, Layout))
+    if (!ESize->evaluateKnownAbsolute(Res, Layout))
       report_fatal_error("Size expression must be absolute.");
     Size = Res;
   }
@@ -969,8 +932,7 @@ bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isUsedInReloc) {
 }
 
 void ELFObjectWriter::computeIndexMap(MCAssembler &Asm,
-                                      SectionIndexMapTy &SectionIndexMap,
-                                      RelMapTy &RelMap) {
+                                      SectionIndexMapTy &SectionIndexMap) {
   unsigned Index = 1;
   for (MCAssembler::iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
@@ -994,17 +956,15 @@ void ELFObjectWriter::computeIndexMap(MCAssembler &Asm,
     if (MCSectionData *RelSD = createRelocationSection(Asm, SD)) {
       const MCSectionELF *RelSection =
           static_cast<const MCSectionELF *>(&RelSD->getSection());
-      RelMap[RelSection] = &Section;
       SectionIndexMap[RelSection] = Index++;
     }
   }
 }
 
-void
-ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
-                                    const SectionIndexMapTy &SectionIndexMap,
-                                    const RevGroupMapTy &RevGroupMap,
-                                    unsigned NumRegularSections) {
+void ELFObjectWriter::computeSymbolTable(
+    MCAssembler &Asm, const MCAsmLayout &Layout,
+    const SectionIndexMapTy &SectionIndexMap,
+    const RevGroupMapTy &RevGroupMap) {
   // FIXME: Is this the correct place to do this?
   // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
   if (NeedsGOT) {
@@ -1167,15 +1127,12 @@ ELFObjectWriter::createRelocationSection(MCAssembler &Asm,
     EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
 
   unsigned Flags = 0;
-  StringRef Group = "";
-  if (Section.getFlags() & ELF::SHF_GROUP) {
+  if (Section.getFlags() & ELF::SHF_GROUP)
     Flags = ELF::SHF_GROUP;
-    Group = Section.getGroup()->getName();
-  }
 
-  const MCSectionELF *RelaSection = Ctx.getELFSection(
+  const MCSectionELF *RelaSection = Ctx.createELFRelSection(
       RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL,
-      Flags, EntrySize, Group, true);
+      Flags, EntrySize, Section.getGroup(), &Section);
   return &Asm.getOrCreateSectionData(*RelaSection);
 }
 
@@ -1324,8 +1281,7 @@ void ELFObjectWriter::CompressDebugSections(MCAssembler &Asm,
   }
 }
 
-void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
-                                       const RelMapTy &RelMap) {
+void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
   for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
     MCSectionData &RelSD = *it;
     const MCSectionELF &RelSection =
@@ -1335,7 +1291,7 @@ void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
     if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA)
       continue;
 
-    const MCSectionELF *Section = RelMap.lookup(&RelSection);
+    const MCSectionELF *Section = RelSection.getAssociatedSection();
     MCSectionData &SD = Asm.getOrCreateSectionData(*Section);
     RelSD.setAlignment(is64Bit() ? 8 : 4);
 
@@ -1362,31 +1318,14 @@ void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
   WriteWord(EntrySize); // sh_entsize
 }
 
-// ELF doesn't require relocations to be in any order. We sort by the r_offset,
-// just to match gnu as for easier comparison. The use type is an arbitrary way
-// of making the sort deterministic.
-static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
-  const ELFRelocationEntry &A = *AP;
-  const ELFRelocationEntry &B = *BP;
-  if (A.Offset != B.Offset)
-    return B.Offset - A.Offset;
-  if (B.Type != A.Type)
-    return A.Type - B.Type;
-  //llvm_unreachable("ELFRelocs might be unstable!");
-  return 0;
-}
-
-static void sortRelocs(const MCAssembler &Asm,
-                       std::vector<ELFRelocationEntry> &Relocs) {
-  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
-}
-
 void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
                                                MCDataFragment *F,
                                                const MCSectionData *SD) {
   std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
 
-  sortRelocs(Asm, Relocs);
+  // Sort the relocation entries. Most targets just sort by Offset, but some
+  // (e.g., MIPS) have additional constraints.
+  TargetObjectWriter->sortRelocs(Asm, Relocs);
 
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
     const ELFRelocationEntry &Entry = Relocs[e - i - 1];
@@ -1473,12 +1412,9 @@ void ELFObjectWriter::CreateMetadataSections(
                           ShStrTabBuilder.data().end());
 }
 
-void ELFObjectWriter::createIndexedSections(MCAssembler &Asm,
-                                            MCAsmLayout &Layout,
-                                            GroupMapTy &GroupMap,
-                                            RevGroupMapTy &RevGroupMap,
-                                            SectionIndexMapTy &SectionIndexMap,
-                                            RelMapTy &RelMap) {
+void ELFObjectWriter::createIndexedSections(
+    MCAssembler &Asm, MCAsmLayout &Layout, GroupMapTy &GroupMap,
+    RevGroupMapTy &RevGroupMap, SectionIndexMapTy &SectionIndexMap) {
   MCContext &Ctx = Asm.getContext();
 
   // Build the groups
@@ -1502,7 +1438,7 @@ void ELFObjectWriter::createIndexedSections(MCAssembler &Asm,
     GroupMap[Group] = SignatureSymbol;
   }
 
-  computeIndexMap(Asm, SectionIndexMap, RelMap);
+  computeIndexMap(Asm, SectionIndexMap);
 
   // Add sections to the groups
   for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
@@ -1522,7 +1458,6 @@ void ELFObjectWriter::createIndexedSections(MCAssembler &Asm,
 
 void ELFObjectWriter::writeSection(MCAssembler &Asm,
                                    const SectionIndexMapTy &SectionIndexMap,
-                                   const RelMapTy &RelMap,
                                    uint32_t GroupSymbolIndex,
                                    uint64_t Offset, uint64_t Size,
                                    uint64_t Alignment,
@@ -1531,16 +1466,19 @@ void ELFObjectWriter::writeSection(MCAssembler &Asm,
   uint64_t sh_info = 0;
 
   switch(Section.getType()) {
+  default:
+    // Nothing to do.
+    break;
+
   case ELF::SHT_DYNAMIC:
     sh_link = ShStrTabBuilder.getOffset(Section.getSectionName());
-    sh_info = 0;
     break;
 
   case ELF::SHT_REL:
   case ELF::SHT_RELA: {
     sh_link = SymbolTableIndex;
     assert(sh_link && ".symtab not found");
-    const MCSectionELF *InfoSection = RelMap.find(&Section)->second;
+    const MCSectionELF *InfoSection = Section.getAssociatedSection();
     sh_info = SectionIndexMap.lookup(InfoSection);
     break;
   }
@@ -1555,45 +1493,15 @@ void ELFObjectWriter::writeSection(MCAssembler &Asm,
     sh_link = SymbolTableIndex;
     break;
 
-  case ELF::SHT_PROGBITS:
-  case ELF::SHT_STRTAB:
-  case ELF::SHT_NOBITS:
-  case ELF::SHT_NOTE:
-  case ELF::SHT_NULL:
-  case ELF::SHT_ARM_ATTRIBUTES:
-  case ELF::SHT_INIT_ARRAY:
-  case ELF::SHT_FINI_ARRAY:
-  case ELF::SHT_PREINIT_ARRAY:
-  case ELF::SHT_X86_64_UNWIND:
-  case ELF::SHT_MIPS_REGINFO:
-  case ELF::SHT_MIPS_OPTIONS:
-  case ELF::SHT_MIPS_ABIFLAGS:
-    // Nothing to do.
-    break;
-
   case ELF::SHT_GROUP:
     sh_link = SymbolTableIndex;
     sh_info = GroupSymbolIndex;
     break;
-
-  default:
-    llvm_unreachable("FIXME: sh_type value not supported!");
   }
 
   if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
-      Section.getType() == ELF::SHT_ARM_EXIDX) {
-    StringRef SecName(Section.getSectionName());
-    if (SecName == ".ARM.exidx") {
-      sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection(
-          ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC));
-    } else if (SecName.startswith(".ARM.exidx")) {
-      StringRef GroupName =
-          Section.getGroup() ? Section.getGroup()->getName() : "";
-      sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection(
-          SecName.substr(sizeof(".ARM.exidx") - 1), ELF::SHT_PROGBITS,
-          ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, 0, GroupName));
-    }
-  }
+      Section.getType() == ELF::SHT_ARM_EXIDX)
+    sh_link = SectionIndexMap.lookup(Section.getAssociatedSection());
 
   WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()),
                    Section.getType(),
@@ -1617,13 +1525,6 @@ uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
   return Ret;
 }
 
-uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
-                                             const MCSectionData &SD) {
-  if (IsELFMetaDataSection(SD))
-    return DataSectionSize(SD);
-  return Layout.getSectionFileSize(&SD);
-}
-
 uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
                                                 const MCSectionData &SD) {
   if (IsELFMetaDataSection(SD))
@@ -1631,14 +1532,9 @@ uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
   return Layout.getSectionAddressSize(&SD);
 }
 
-void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
+void ELFObjectWriter::writeDataSectionData(MCAssembler &Asm,
                                            const MCAsmLayout &Layout,
-                                           const MCSectionELF &Section) {
-  const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
-
-  uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
-  WriteZeros(Padding);
-
+                                           const MCSectionData &SD) {
   if (IsELFMetaDataSection(SD)) {
     for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
          ++i) {
@@ -1652,28 +1548,20 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
 }
 
 void ELFObjectWriter::writeSectionHeader(
-    MCAssembler &Asm, const GroupMapTy &GroupMap, const MCAsmLayout &Layout,
-    const SectionIndexMapTy &SectionIndexMap, const RelMapTy &RelMap,
+    ArrayRef<const MCSectionELF *> Sections, MCAssembler &Asm,
+    const GroupMapTy &GroupMap, const MCAsmLayout &Layout,
+    const SectionIndexMapTy &SectionIndexMap,
     const SectionOffsetMapTy &SectionOffsetMap) {
-  const unsigned NumSections = Asm.size() + 1;
-
-  std::vector<const MCSectionELF*> Sections;
-  Sections.resize(NumSections - 1);
-
-  for (SectionIndexMapTy::const_iterator i=
-         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
-    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
-    Sections[p.second - 1] = p.first;
-  }
+  const unsigned NumSections = Asm.size();
 
   // Null section first.
   uint64_t FirstSectionSize =
-    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+      (NumSections + 1) >= ELF::SHN_LORESERVE ? NumSections + 1 : 0;
   uint32_t FirstSectionLink =
     ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
   WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
 
-  for (unsigned i = 0; i < NumSections - 1; ++i) {
+  for (unsigned i = 0; i < NumSections; ++i) {
     const MCSectionELF &Section = *Sections[i];
     const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
     uint32_t GroupSymbolIndex;
@@ -1685,39 +1573,9 @@ void ELFObjectWriter::writeSectionHeader(
 
     uint64_t Size = GetSectionAddressSize(Layout, SD);
 
-    writeSection(Asm, SectionIndexMap, RelMap, GroupSymbolIndex,
-                 SectionOffsetMap.lookup(&Section), Size,
-                 SD.getAlignment(), Section);
-  }
-}
-
-void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm,
-                                  std::vector<const MCSectionELF*> &Sections) {
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_GROUP)
-      Sections.push_back(&Section);
-  }
-
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() != ELF::SHT_GROUP &&
-        Section.getType() != ELF::SHT_REL &&
-        Section.getType() != ELF::SHT_RELA)
-      Sections.push_back(&Section);
-  }
-
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_REL ||
-        Section.getType() == ELF::SHT_RELA)
-      Sections.push_back(&Section);
+    writeSection(Asm, SectionIndexMap, GroupSymbolIndex,
+                 SectionOffsetMap.lookup(&Section), Size, SD.getAlignment(),
+                 Section);
   }
 }
 
@@ -1727,102 +1585,77 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
   RevGroupMapTy RevGroupMap;
   SectionIndexMapTy SectionIndexMap;
 
-  unsigned NumUserSections = Asm.size();
-
   CompressDebugSections(Asm, const_cast<MCAsmLayout &>(Layout));
-
-  DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
-  const unsigned NumUserAndRelocSections = Asm.size();
-  createIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
-                        RevGroupMap, SectionIndexMap, RelMap);
-  const unsigned AllSections = Asm.size();
-  const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections;
-
-  unsigned NumRegularSections = NumUserSections + NumIndexedSections;
+  createIndexedSections(Asm, const_cast<MCAsmLayout &>(Layout), GroupMap,
+                        RevGroupMap, SectionIndexMap);
 
   // Compute symbol table information.
-  computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap,
-                     NumRegularSections);
+  computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap);
 
-  WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+  WriteRelocations(Asm, const_cast<MCAsmLayout &>(Layout));
 
   CreateMetadataSections(const_cast<MCAssembler&>(Asm),
                          const_cast<MCAsmLayout&>(Layout),
                          SectionIndexMap);
 
-  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
-  uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
-                                    sizeof(ELF::Elf32_Ehdr);
-  uint64_t FileOff = HeaderSize;
-
+  unsigned NumSections = Asm.size();
   std::vector<const MCSectionELF*> Sections;
-  ComputeSectionOrder(Asm, Sections);
-  unsigned NumSections = Sections.size();
-  SectionOffsetMapTy SectionOffsetMap;
-  for (unsigned i = 0; i < NumRegularSections + 1; ++i) {
-    const MCSectionELF &Section = *Sections[i];
-    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+  Sections.resize(NumSections);
 
-    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
-
-    // Remember the offset into the file for this section.
-    SectionOffsetMap[&Section] = FileOff;
+  for (auto &Pair : SectionIndexMap)
+    Sections[Pair.second - 1] = Pair.first;
 
-    // Get the size of the section in the output file (including padding).
-    FileOff += GetSectionFileSize(Layout, SD);
-  }
-
-  FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
-
-  const unsigned SectionHeaderOffset = FileOff - HeaderSize;
+  SectionOffsetMapTy SectionOffsetMap;
 
-  uint64_t SectionHeaderEntrySize = is64Bit() ?
-    sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr);
-  FileOff += (NumSections + 1) * SectionHeaderEntrySize;
+  // Write out the ELF header ...
+  WriteHeader(Asm, NumSections + 1);
 
-  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) {
+  // ... then the sections ...
+  for (unsigned i = 0; i < NumSections; ++i) {
     const MCSectionELF &Section = *Sections[i];
     const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
-
-    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+    uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
+    WriteZeros(Padding);
 
     // Remember the offset into the file for this section.
-    SectionOffsetMap[&Section] = FileOff;
+    SectionOffsetMap[&Section] = OS.tell();
 
-    // Get the size of the section in the output file (including padding).
-    FileOff += GetSectionFileSize(Layout, SD);
+    writeDataSectionData(Asm, Layout, SD);
   }
 
-  // Write out the ELF header ...
-  WriteHeader(Asm, SectionHeaderOffset, NumSections + 1);
-
-  // ... then the regular sections ...
-  // + because of .shstrtab
-  for (unsigned i = 0; i < NumRegularSections + 1; ++i)
-    WriteDataSectionData(Asm, Layout, *Sections[i]);
-
+  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
   uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment);
   WriteZeros(Padding);
 
+  const unsigned SectionHeaderOffset = OS.tell();
+
   // ... then the section header table ...
-  writeSectionHeader(Asm, GroupMap, Layout, SectionIndexMap, RelMap,
+  writeSectionHeader(Sections, Asm, GroupMap, Layout, SectionIndexMap,
                      SectionOffsetMap);
 
-  // ... and then the remaining sections ...
-  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
-    WriteDataSectionData(Asm, Layout, *Sections[i]);
+  if (is64Bit()) {
+    uint64_t Val = SectionHeaderOffset;
+    if (sys::IsLittleEndianHost != IsLittleEndian)
+      sys::swapByteOrder(Val);
+    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+              offsetof(ELF::Elf64_Ehdr, e_shoff));
+  } else {
+    uint32_t Val = SectionHeaderOffset;
+    if (sys::IsLittleEndianHost != IsLittleEndian)
+      sys::swapByteOrder(Val);
+    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+              offsetof(ELF::Elf32_Ehdr, e_shoff));
+  }
 }
 
-bool
-ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-  if (::isWeak(DataA))
+bool ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbolData &DataA,
+    const MCSymbolData *DataB, const MCFragment &FB, bool InSet,
+    bool IsPCRel) const {
+  if (!InSet && (::isWeak(DataA) || (DataB && ::isWeak(*DataB))))
     return false;
   return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
-                                                 Asm, DataA, FB,InSet, IsPCRel);
+      Asm, DataA, DataB, FB, InSet, IsPCRel);
 }
 
 bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const {
@@ -1830,7 +1663,7 @@ bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const {
 }
 
 MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                            raw_ostream &OS,
+                                            raw_pwrite_stream &OS,
                                             bool IsLittleEndian) {
   return new ELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 62f5279..144d355 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -36,11 +36,10 @@ using namespace llvm;
 
 namespace {
 
-class MCAsmStreamer : public MCStreamer {
-protected:
+class MCAsmStreamer final : public MCStreamer {
+  std::unique_ptr<formatted_raw_ostream> OSOwner;
   formatted_raw_ostream &OS;
   const MCAsmInfo *MAI;
-private:
   std::unique_ptr<MCInstPrinter> InstPrinter;
   std::unique_ptr<MCCodeEmitter> Emitter;
   std::unique_ptr<MCAsmBackend> AsmBackend;
@@ -57,14 +56,15 @@ private:
   void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
 
 public:
-  MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
+  MCAsmStreamer(MCContext &Context, std::unique_ptr<formatted_raw_ostream> os,
                 bool isVerboseAsm, bool useDwarfDirectory,
                 MCInstPrinter *printer, MCCodeEmitter *emitter,
                 MCAsmBackend *asmbackend, bool showInst)
-      : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
-        InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
-        CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
-        ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) {
+      : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner),
+        MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter),
+        AsmBackend(asmbackend), CommentStream(CommentToEmit),
+        IsVerboseAsm(isVerboseAsm), ShowInst(showInst),
+        UseDwarfDirectory(useDwarfDirectory) {
     if (InstPrinter && IsVerboseAsm)
       InstPrinter->setCommentStream(CommentStream);
   }
@@ -1262,7 +1262,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &S
 
   // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
   if (InstPrinter)
-    InstPrinter->printInst(&Inst, OS, "");
+    InstPrinter->printInst(&Inst, OS, "", STI);
   else
     Inst.print(OS);
   EmitEOL();
@@ -1314,10 +1314,10 @@ void MCAsmStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
-                                    formatted_raw_ostream &OS,
+                                    std::unique_ptr<formatted_raw_ostream> OS,
                                     bool isVerboseAsm, bool useDwarfDirectory,
                                     MCInstPrinter *IP, MCCodeEmitter *CE,
                                     MCAsmBackend *MAB, bool ShowInst) {
-  return new MCAsmStreamer(Context, OS, isVerboseAsm, useDwarfDirectory, IP, CE,
-                           MAB, ShowInst);
+  return new MCAsmStreamer(Context, std::move(OS), isVerboseAsm,
+                           useDwarfDirectory, IP, CE, MAB, ShowInst);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 857eafc..d09e383 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -229,8 +229,9 @@ uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
   return getSectionAddressSize(SD);
 }
 
-uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F,
-                                           uint64_t FOffset, uint64_t FSize) {
+uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
+                                    const MCFragment *F,
+                                    uint64_t FOffset, uint64_t FSize) {
   uint64_t BundleSize = Assembler.getBundleAlignSize();
   assert(BundleSize > 0 &&
          "computeBundlePadding should only be called if bundling is enabled");
@@ -332,6 +333,7 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
     getFragmentList().insert(IP, F);
     F->setParent(this);
   }
+
   return IP;
 }
 
@@ -497,14 +499,12 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
     } else {
       const MCSymbolRefExpr *A = Target.getSymA();
       const MCSymbol &SA = A->getSymbol();
-      if (A->getKind() != MCSymbolRefExpr::VK_None ||
-          SA.AliasedSymbol().isUndefined()) {
+      if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) {
         IsResolved = false;
       } else {
         const MCSymbolData &DataA = getSymbolData(SA);
-        IsResolved =
-          getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
-                                                             *DF, false, true);
+        IsResolved = getWriter().IsSymbolRefDifferenceFullyResolvedImpl(
+            *this, DataA, nullptr, *DF, false, true);
       }
     }
   } else {
@@ -514,12 +514,12 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
   Value = Target.getConstant();
 
   if (const MCSymbolRefExpr *A = Target.getSymA()) {
-    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = A->getSymbol();
     if (Sym.isDefined())
       Value += Layout.getSymbolOffset(&getSymbolData(Sym));
   }
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
-    const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = B->getSymbol();
     if (Sym.isDefined())
       Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
   }
@@ -634,7 +634,12 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
   // The fragment's offset will point to after the padding, and its computed
   // size won't include the padding.
   //
-  if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
+  // When the -mc-relax-all flag is used, we optimize bundling by writting the
+  // bundle padding directly into fragments when the instructions are emitted
+  // inside the streamer.
+  //
+  if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() &&
+      F->hasInstructions()) {
     assert(isa<MCEncodedFragment>(F) &&
            "Only MCEncodedFragment implementations have instructions");
     uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
@@ -642,7 +647,8 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
     if (FSize > Assembler.getBundleAlignSize())
       report_fatal_error("Fragment can't be larger than a bundle size");
 
-    uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize);
+    uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F,
+                                                          F->Offset, FSize);
     if (RequiredBundlePadding > UINT8_MAX)
       report_fatal_error("Padding cannot exceed 255 bytes");
     F->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
@@ -657,24 +663,18 @@ static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
   OW->WriteBytes(EF.getContents());
 }
 
-/// \brief Write the fragment \p F to the output file.
-static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                          const MCFragment &F) {
-  MCObjectWriter *OW = &Asm.getWriter();
-
-  // FIXME: Embed in fragments instead?
-  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
-
+void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize,
+                                       MCObjectWriter *OW) const {
   // Should NOP padding be written out before this fragment?
   unsigned BundlePadding = F.getBundlePadding();
   if (BundlePadding > 0) {
-    assert(Asm.isBundlingEnabled() &&
+    assert(isBundlingEnabled() &&
            "Writing bundle padding with disabled bundling");
     assert(F.hasInstructions() &&
            "Writing bundle padding for a fragment without instructions");
 
-    unsigned TotalLength = BundlePadding + static_cast<unsigned>(FragmentSize);
-    if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) {
+    unsigned TotalLength = BundlePadding + static_cast<unsigned>(FSize);
+    if (F.alignToBundleEnd() && TotalLength > getBundleAlignSize()) {
       // If the padding itself crosses a bundle boundary, it must be emitted
       // in 2 pieces, since even nop instructions must not cross boundaries.
       //             v--------------v   <- BundleAlignSize
@@ -683,16 +683,27 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
       // | Prev |####|####|    F    |
       // ----------------------------
       //        ^-------------------^   <- TotalLength
-      unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize();
-      if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW))
+      unsigned DistanceToBoundary = TotalLength - getBundleAlignSize();
+      if (!getBackend().writeNopData(DistanceToBoundary, OW))
           report_fatal_error("unable to write NOP sequence of " +
                              Twine(DistanceToBoundary) + " bytes");
       BundlePadding -= DistanceToBoundary;
     }
-    if (!Asm.getBackend().writeNopData(BundlePadding, OW))
+    if (!getBackend().writeNopData(BundlePadding, OW))
       report_fatal_error("unable to write NOP sequence of " +
                          Twine(BundlePadding) + " bytes");
   }
+}
+
+/// \brief Write the fragment \p F to the output file.
+static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                          const MCFragment &F) {
+  MCObjectWriter *OW = &Asm.getWriter();
+
+  // FIXME: Embed in fragments instead?
+  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
+
+  Asm.writeFragmentPadding(F, FragmentSize, OW);
 
   // This variable (and its dummy usage) is to participate in the assert at
   // the end of the function.
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 3cb3ea1..5f8e3c1 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -139,6 +139,11 @@ MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName,
                            "$frame_escape_" + Twine(Idx));
 }
 
+MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(StringRef FuncName) {
+  return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
+                           "$parent_frame_offset");
+}
+
 MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) {
   // Determine whether this is an assembler temporary or normal label, if used.
   bool IsTemporary = false;
@@ -257,41 +262,63 @@ MCContext::getMachOSection(StringRef Segment, StringRef Section,
                                             Reserved2, Kind, Begin);
 }
 
-const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
-                                             unsigned Flags,
-                                             const char *BeginSymName) {
-  return getELFSection(Section, Type, Flags, 0, "", BeginSymName);
-}
-
 void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) {
   StringRef GroupName;
   if (const MCSymbol *Group = Section->getGroup())
     GroupName = Group->getName();
 
-  ELFUniquingMap.erase(SectionGroupPair(Section->getSectionName(), GroupName));
-  auto I =
-      ELFUniquingMap.insert(std::make_pair(SectionGroupPair(Name, GroupName),
-                                           Section)).first;
-  StringRef CachedName = I->first.first;
+  unsigned UniqueID = Section->getUniqueID();
+  ELFUniquingMap.erase(
+      ELFSectionKey{Section->getSectionName(), GroupName, UniqueID});
+  auto I = ELFUniquingMap.insert(std::make_pair(
+                                     ELFSectionKey{Name, GroupName, UniqueID},
+                                     Section)).first;
+  StringRef CachedName = I->first.SectionName;
   const_cast<MCSectionELF*>(Section)->setSectionName(CachedName);
 }
 
+const MCSectionELF *
+MCContext::createELFRelSection(StringRef Name, unsigned Type, unsigned Flags,
+                               unsigned EntrySize, const MCSymbol *Group,
+                               const MCSectionELF *Associated) {
+  StringMap<bool>::iterator I;
+  bool Inserted;
+  std::tie(I, Inserted) = ELFRelSecNames.insert(std::make_pair(Name, true));
+
+  return new (*this)
+      MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(),
+                   EntrySize, Group, true, nullptr, Associated);
+}
+
 const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
                                              unsigned Flags, unsigned EntrySize,
-                                             StringRef Group, bool Unique,
+                                             StringRef Group, unsigned UniqueID,
                                              const char *BeginSymName) {
+  MCSymbol *GroupSym = nullptr;
+  if (!Group.empty())
+    GroupSym = GetOrCreateSymbol(Group);
+
+  return getELFSection(Section, Type, Flags, EntrySize, GroupSym, UniqueID,
+                       BeginSymName, nullptr);
+}
+
+const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
+                                             unsigned Flags, unsigned EntrySize,
+                                             const MCSymbol *GroupSym,
+                                             unsigned UniqueID,
+                                             const char *BeginSymName,
+                                             const MCSectionELF *Associated) {
+  StringRef Group = "";
+  if (GroupSym)
+    Group = GroupSym->getName();
   // Do the lookup, if we have a hit, return it.
   auto IterBool = ELFUniquingMap.insert(
-      std::make_pair(SectionGroupPair(Section, Group), nullptr));
+      std::make_pair(ELFSectionKey{Section, Group, UniqueID}, nullptr));
   auto &Entry = *IterBool.first;
-  if (!IterBool.second && !Unique)
+  if (!IterBool.second)
     return Entry.second;
 
-  MCSymbol *GroupSym = nullptr;
-  if (!Group.empty())
-    GroupSym = GetOrCreateSymbol(Group);
-
-  StringRef CachedName = Entry.first.first;
+  StringRef CachedName = Entry.first.SectionName;
 
   SectionKind Kind;
   if (Flags & ELF::SHF_EXECINSTR)
@@ -303,25 +330,17 @@ const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
   if (BeginSymName)
     Begin = createTempSymbol(BeginSymName, false);
 
-  MCSectionELF *Result = new (*this) MCSectionELF(
-      CachedName, Type, Flags, Kind, EntrySize, GroupSym, Unique, Begin);
-  if (!Unique)
-    Entry.second = Result;
+  MCSectionELF *Result =
+      new (*this) MCSectionELF(CachedName, Type, Flags, Kind, EntrySize,
+                               GroupSym, UniqueID, Begin, Associated);
+  Entry.second = Result;
   return Result;
 }
 
-const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
-                                             unsigned Flags, unsigned EntrySize,
-                                             StringRef Group,
-                                             const char *BeginSymName) {
-  return getELFSection(Section, Type, Flags, EntrySize, Group, false,
-                       BeginSymName);
-}
-
 const MCSectionELF *MCContext::CreateELFGroupSection() {
   MCSectionELF *Result = new (*this)
       MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
-                   nullptr, false, nullptr);
+                   nullptr, ~0, nullptr, nullptr);
   return Result;
 }
 
@@ -329,23 +348,24 @@ const MCSectionCOFF *
 MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
                           SectionKind Kind, StringRef COMDATSymName,
                           int Selection, const char *BeginSymName) {
-  // Do the lookup, if we have a hit, return it.
+  MCSymbol *COMDATSymbol = nullptr;
+  if (!COMDATSymName.empty()) {
+    COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
+    COMDATSymName = COMDATSymbol->getName();
+  }
 
-  SectionGroupTriple T(Section, COMDATSymName, Selection);
+  // Do the lookup, if we have a hit, return it.
+  COFFSectionKey T{Section, COMDATSymName, Selection};
   auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr));
   auto Iter = IterBool.first;
   if (!IterBool.second)
     return Iter->second;
 
-  MCSymbol *COMDATSymbol = nullptr;
-  if (!COMDATSymName.empty())
-    COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
-
   MCSymbol *Begin = nullptr;
   if (BeginSymName)
     Begin = createTempSymbol(BeginSymName, false);
 
-  StringRef CachedName = std::get<0>(Iter->first);
+  StringRef CachedName = Iter->first.SectionName;
   MCSectionCOFF *Result = new (*this) MCSectionCOFF(
       CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin);
 
@@ -361,7 +381,7 @@ const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
 }
 
 const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
-  SectionGroupTriple T(Section, "", 0);
+  COFFSectionKey T{Section, "", 0};
   auto Iter = COFFUniquingMap.find(T);
   if (Iter == COFFUniquingMap.end())
     return nullptr;
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index d9f01d0..716d76a 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -33,22 +33,22 @@ using namespace llvm;
 // disassembler context.  If not, it returns NULL.
 //
 LLVMDisasmContextRef
-LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
+LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
                             const char *Features, void *DisInfo, int TagType,
                             LLVMOpInfoCallback GetOpInfo,
                             LLVMSymbolLookupCallback SymbolLookUp) {
   // Get the target.
   std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
   if (!TheTarget)
     return nullptr;
 
-  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
+  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TT);
   if (!MRI)
     return nullptr;
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple);
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TT);
   if (!MAI)
     return nullptr;
 
@@ -56,8 +56,8 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
   if (!MII)
     return nullptr;
 
-  const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
-                                                                Features);
+  const MCSubtargetInfo *STI =
+      TheTarget->createMCSubtargetInfo(TT, CPU, Features);
   if (!STI)
     return nullptr;
 
@@ -72,25 +72,24 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
     return nullptr;
 
   std::unique_ptr<MCRelocationInfo> RelInfo(
-      TheTarget->createMCRelocationInfo(Triple, *Ctx));
+      TheTarget->createMCRelocationInfo(TT, *Ctx));
   if (!RelInfo)
     return nullptr;
 
   std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
-      Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo)));
+      TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo)));
   DisAsm->setSymbolizer(std::move(Symbolizer));
 
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
-  MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
-                                                     *MAI, *MII, *MRI, *STI);
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(
+      Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI);
   if (!IP)
     return nullptr;
 
-  LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
-                                                GetOpInfo, SymbolLookUp,
-                                                TheTarget, MAI, MRI,
-                                                STI, MII, Ctx, DisAsm, IP);
+  LLVMDisasmContext *DC =
+      new LLVMDisasmContext(TT, DisInfo, TagType, GetOpInfo, SymbolLookUp,
+                            TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP);
   if (!DC)
     return nullptr;
 
@@ -98,19 +97,19 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
   return DC;
 }
 
-LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
-                                         void *DisInfo, int TagType,
-                                         LLVMOpInfoCallback GetOpInfo,
-                                         LLVMSymbolLookupCallback SymbolLookUp){
-  return LLVMCreateDisasmCPUFeatures(Triple, CPU, "", DisInfo, TagType,
-                                     GetOpInfo, SymbolLookUp);
+LLVMDisasmContextRef
+LLVMCreateDisasmCPU(const char *TT, const char *CPU, void *DisInfo, int TagType,
+                    LLVMOpInfoCallback GetOpInfo,
+                    LLVMSymbolLookupCallback SymbolLookUp) {
+  return LLVMCreateDisasmCPUFeatures(TT, CPU, "", DisInfo, TagType, GetOpInfo,
+                                     SymbolLookUp);
 }
 
-LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo,
+LLVMDisasmContextRef LLVMCreateDisasm(const char *TT, void *DisInfo,
                                       int TagType, LLVMOpInfoCallback GetOpInfo,
                                       LLVMSymbolLookupCallback SymbolLookUp) {
-  return LLVMCreateDisasmCPUFeatures(Triple, "", "", DisInfo, TagType,
-                                     GetOpInfo, SymbolLookUp);
+  return LLVMCreateDisasmCPUFeatures(TT, "", "", DisInfo, TagType, GetOpInfo,
+                                     SymbolLookUp);
 }
 
 //
@@ -268,7 +267,7 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
     SmallVector<char, 64> InsnStr;
     raw_svector_ostream OS(InsnStr);
     formatted_raw_ostream FormattedOS(OS);
-    IP->printInst(&Inst, FormattedOS, AnnotationsStr);
+    IP->printInst(&Inst, FormattedOS, AnnotationsStr, *DC->getSubtargetInfo());
 
     if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency)
       emitLatency(DC, Inst);
@@ -312,11 +311,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
       const MCAsmInfo *MAI = DC->getAsmInfo();
       const MCInstrInfo *MII = DC->getInstrInfo();
       const MCRegisterInfo *MRI = DC->getRegisterInfo();
-      const MCSubtargetInfo *STI = DC->getSubtargetInfo();
       int AsmPrinterVariant = MAI->getAssemblerDialect();
       AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0;
       MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter(
-          AsmPrinterVariant, *MAI, *MII, *MRI, *STI);
+          Triple(DC->getTripleName()), AsmPrinterVariant, *MAI, *MII, *MRI);
       if (IP) {
         DC->setIP(IP);
         DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant);
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 87e7ed1..e9f685e 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -803,7 +803,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) {
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
 
-  for (const auto sec : Sections) {
+  for (const auto &sec : Sections) {
 
     MCSymbol *StartSymbol = sec.second.first;
     MCSymbol *EndSymbol = sec.second.second;
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 84176dc..dc3d6c3 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -28,3 +28,24 @@ bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
                                                       unsigned Type) const {
   return false;
 }
+
+// ELF doesn't require relocations to be in any order. We sort by the Offset,
+// just to match gnu as for easier comparison. The use type is an arbitrary way
+// of making the sort deterministic.
+static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
+  const ELFRelocationEntry &A = *AP;
+  const ELFRelocationEntry &B = *BP;
+  if (A.Offset != B.Offset)
+    return B.Offset - A.Offset;
+  if (B.Type != A.Type)
+    return A.Type - B.Type;
+  //llvm_unreachable("ELFRelocs might be unstable!");
+  return 0;
+}
+
+
+void
+MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
+                                    std::vector<ELFRelocationEntry> &Relocs) {
+  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
+}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index cdf5033..aa05390 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -40,6 +41,48 @@ using namespace llvm;
 MCELFStreamer::~MCELFStreamer() {
 }
 
+void MCELFStreamer::mergeFragment(MCDataFragment *DF,
+                                  MCEncodedFragmentWithFixups *EF) {
+  MCAssembler &Assembler = getAssembler();
+
+  if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
+    uint64_t FSize = EF->getContents().size();
+
+    if (FSize > Assembler.getBundleAlignSize())
+      report_fatal_error("Fragment can't be larger than a bundle size");
+
+    uint64_t RequiredBundlePadding = computeBundlePadding(
+        Assembler, EF, DF->getContents().size(), FSize);
+
+    if (RequiredBundlePadding > UINT8_MAX)
+      report_fatal_error("Padding cannot exceed 255 bytes");
+
+    if (RequiredBundlePadding > 0) {
+      SmallString<256> Code;
+      raw_svector_ostream VecOS(Code);
+      MCObjectWriter *OW = Assembler.getBackend().createObjectWriter(VecOS);
+
+      EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
+
+      Assembler.writeFragmentPadding(*EF, FSize, OW);
+      VecOS.flush();
+      delete OW;
+
+      DF->getContents().append(Code.begin(), Code.end());
+    }
+  }
+
+  flushPendingLabels(DF, DF->getContents().size());
+
+  for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
+    EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
+                                 DF->getContents().size());
+    DF->getFixups().push_back(EF->getFixups()[i]);
+  }
+  DF->setHasInstructions(true);
+  DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
+}
+
 void MCELFStreamer::InitSections(bool NoExecStack) {
   // This emulates the same behavior of GNU as. This makes it easier
   // to compare the output as the major sections are in the same order.
@@ -449,7 +492,16 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
 
   if (Assembler.isBundlingEnabled()) {
     MCSectionData *SD = getCurrentSectionData();
-    if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
+    if (Assembler.getRelaxAll() && SD->isBundleLocked())
+      // If the -mc-relax-all flag is used and we are bundle-locked, we re-use
+      // the current bundle group.
+      DF = BundleGroups.back();
+    else if (Assembler.getRelaxAll() && !SD->isBundleLocked())
+      // When not in a bundle-locked group and the -mc-relax-all flag is used,
+      // we create a new temporary fragment which will be later merged into
+      // the current fragment.
+      DF = new MCDataFragment();
+    else if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
       // If we are bundle-locked, we re-use the current fragment.
       // The bundle-locking directive ensures this is a new data fragment.
       DF = cast<MCDataFragment>(getCurrentFragment());
@@ -487,6 +539,14 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
   }
   DF->setHasInstructions(true);
   DF->getContents().append(Code.begin(), Code.end());
+
+  if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
+    MCSectionData *SD = getCurrentSectionData();
+    if (!SD->isBundleLocked()) {
+      mergeFragment(getOrCreateDataFragment(), DF);
+      delete DF;
+    }
+  }
 }
 
 void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
@@ -510,6 +570,12 @@ void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
   if (!SD->isBundleLocked())
     SD->setBundleGroupBeforeFirstInst(true);
 
+  if (getAssembler().getRelaxAll() && !SD->isBundleLocked()) {
+    // TODO: drop the lock state and set directly in the fragment
+    MCDataFragment *DF = new MCDataFragment();
+    BundleGroups.push_back(DF);
+  }
+
   SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd :
                                       MCSectionData::BundleLocked);
 }
@@ -525,7 +591,27 @@ void MCELFStreamer::EmitBundleUnlock() {
   else if (SD->isBundleGroupBeforeFirstInst())
     report_fatal_error("Empty bundle-locked group is forbidden");
 
-  SD->setBundleLockState(MCSectionData::NotBundleLocked);
+  // When the -mc-relax-all flag is used, we emit instructions to fragments
+  // stored on a stack. When the bundle unlock is emited, we pop a fragment 
+  // from the stack a merge it to the one below.
+  if (getAssembler().getRelaxAll()) {
+    assert(!BundleGroups.empty() && "There are no bundle groups");
+    MCDataFragment *DF = BundleGroups.back();
+
+    // FIXME: Use BundleGroups to track the lock state instead.
+    SD->setBundleLockState(MCSectionData::NotBundleLocked);
+
+    // FIXME: Use more separate fragments for nested groups. 
+    if (!SD->isBundleLocked()) {
+      mergeFragment(getOrCreateDataFragment(), DF);
+      BundleGroups.pop_back();
+      delete DF;
+    }
+
+    if (SD->getBundleLockState() != MCSectionData::BundleLockedAlignToEnd)
+      getOrCreateDataFragment()->setAlignToBundleEnd(false);
+  } else
+    SD->setBundleLockState(MCSectionData::NotBundleLocked);
 }
 
 void MCELFStreamer::Flush() {
@@ -561,7 +647,7 @@ void MCELFStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *CE,
+                                    raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                     bool RelaxAll) {
   MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
   if (RelaxAll)
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 8a64403..0702539 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -775,6 +775,10 @@ const MCSection *MCExpr::FindAssociatedSection() const {
     if (RHS_S == MCSymbol::AbsolutePseudoSection)
       return LHS_S;
 
+    // Not always correct, but probably the best we can do without more context.
+    if (BE->getOpcode() == MCBinaryExpr::Sub)
+      return MCSymbol::AbsolutePseudoSection;
+
     // Otherwise, return the first non-null section.
     return LHS_S ? LHS_S : RHS_S;
   }
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index d5c7101..5c78f5f 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -54,7 +54,7 @@ private:
   void EmitDataRegionEnd();
 
 public:
-  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
+  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
                   MCCodeEmitter *Emitter, bool DWARFMustBeAtTheEnd, bool label)
       : MCObjectStreamer(Context, MAB, OS, Emitter), LabelSections(label),
         DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), CreatedADWARFSection(false) {}
@@ -491,7 +491,7 @@ void MCMachOStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                      raw_ostream &OS, MCCodeEmitter *CE,
+                                      raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                       bool RelaxAll, bool DWARFMustBeAtTheEnd,
                                       bool LabelSections) {
   MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE,
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 6aa2de3..d254e95 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -24,18 +24,13 @@
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_)
+                                   raw_pwrite_stream &OS,
+                                   MCCodeEmitter *Emitter_)
     : MCStreamer(Context),
       Assembler(new MCAssembler(Context, TAB, *Emitter_,
                                 *TAB.createObjectWriter(OS), OS)),
       CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {}
 
-MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_,
-                                   MCAssembler *Assembler)
-    : MCStreamer(Context), Assembler(Assembler), CurSectionData(nullptr),
-      EmitEHFrame(true), EmitDebugFrame(false) {}
-
 MCObjectStreamer::~MCObjectStreamer() {
   delete &Assembler->getBackend();
   delete &Assembler->getEmitter();
@@ -43,7 +38,7 @@ MCObjectStreamer::~MCObjectStreamer() {
   delete Assembler;
 }
 
-void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
+void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
   if (PendingLabels.size()) {
     if (!F) {
       F = new MCDataFragment();
@@ -52,7 +47,7 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
     }
     for (MCSymbolData *SD : PendingLabels) {
       SD->setFragment(F);
-      SD->setOffset(0);
+      SD->setOffset(FOffset);
     }
     PendingLabels.clear();
   }
@@ -93,7 +88,8 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() {
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   // When bundling is enabled, we don't want to add data to a fragment that
   // already has instructions (see MCELFStreamer::EmitInstToData for details)
-  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
+  if (!F || (Assembler->isBundlingEnabled() && !Assembler->getRelaxAll() &&
+             F->hasInstructions())) {
     F = new MCDataFragment();
     insert(F);
   }
@@ -149,7 +145,9 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
   // If there is a current fragment, mark the symbol as pointing into it.
   // Otherwise queue the label and set its fragment pointer when we emit the
   // next fragment.
-  if (auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment())) {
+  auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (F && !(getAssembler().isBundlingEnabled() &&
+             getAssembler().getRelaxAll())) {
     SD.setFragment(F);
     SD.setOffset(F->getContents().size());
   } else {
@@ -248,6 +246,9 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
 
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
                                           const MCSubtargetInfo &STI) {
+  if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled())
+    llvm_unreachable("All instructions should have already been relaxed");
+
   // Always create a new, separate fragment here, because its size can change
   // during relaxation.
   MCRelaxableFragment *IF = new MCRelaxableFragment(Inst, STI);
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 3c536ec..e40c07d 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -27,7 +27,7 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
 
   const MCSymbol &SA = A->getSymbol();
   const MCSymbol &SB = B->getSymbol();
-  if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+  if (SA.isUndefined() || SB.isUndefined())
     return false;
 
   const MCSymbolData &DataA = Asm.getSymbolData(SA);
@@ -35,19 +35,15 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
   if(!DataA.getFragment() || !DataB.getFragment())
     return false;
 
-  return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
-                                                *DataB.getFragment(),
-                                                InSet,
-                                                false);
+  return IsSymbolRefDifferenceFullyResolvedImpl(
+      Asm, DataA, &DataB, *DataB.getFragment(), InSet, false);
 }
 
-bool
-MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-  const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+bool MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbolData &DataA,
+    const MCSymbolData *DataB, const MCFragment &FB, bool InSet,
+    bool IsPCRel) const {
+  const MCSection &SecA = DataA.getSymbol().getSection();
   const MCSection &SecB = FB.getParent()->getSection();
   // On ELF and COFF  A - B is absolute if A and B are in the same section.
   return &SecA == &SecB;
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 2bf980b..92a7507 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -175,7 +175,7 @@ private:
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
-  virtual ~AsmParser();
+  ~AsmParser() override;
 
   bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
 
@@ -4606,7 +4606,7 @@ bool AsmParser::parseMSInlineAsm(
         ++InputIdx;
         OutputDecls.push_back(OpDecl);
         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
-        OutputConstraints.push_back('=' + Operand.getConstraint().str());
+        OutputConstraints.push_back(("=" + Operand.getConstraint()).str());
         AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
       } else {
         InputDecls.push_back(OpDecl);
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 7a120a1..a19339d 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -379,7 +379,7 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
   const MCExpr *Subsection = nullptr;
   bool UseLastGroup = false;
   StringRef UniqueStr;
-  bool Unique = false;
+  int64_t UniqueID = ~0;
 
   // Set the defaults first.
   if (SectionName == ".fini" || SectionName == ".init" ||
@@ -470,7 +470,15 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
           return TokError("expected identifier in directive");
         if (UniqueStr != "unique")
           return TokError("expected 'unique'");
-        Unique = true;
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected commma");
+        Lex();
+        if (getParser().parseAbsoluteExpression(UniqueID))
+          return true;
+        if (UniqueID < 0)
+          return TokError("unique id must be positive");
+        if (!isUInt<32>(UniqueID) || UniqueID == ~0U)
+          return TokError("unique id is too large");
       }
     }
   }
@@ -520,7 +528,7 @@ EndStmt:
   }
 
   const MCSection *ELFSection = getContext().getELFSection(
-      SectionName, Type, Flags, Size, GroupName, Unique);
+      SectionName, Type, Flags, Size, GroupName, UniqueID);
   getStreamer().SwitchSection(ELFSection, Subsection);
 
   if (getContext().getGenDwarfForAssembly()) {
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index da38682..3cd8453 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -24,7 +24,7 @@ MCSectionELF::~MCSectionELF() {} // anchor.
 bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
                                               const MCAsmInfo &MAI) const {
 
-  if (Unique)
+  if (isUnique())
     return false;
 
   // FIXME: Does .section .bss/.data/.text work everywhere??
@@ -148,8 +148,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
     OS << ",comdat";
   }
 
-  if (Unique)
-    OS << ",unique";
+  if (isUnique())
+    OS << ",unique," << UniqueID;
 
   OS << '\n';
 
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index ca3894b..daba321 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -93,9 +93,10 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
   const SubtargetInfoKV *Found =
     std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
   if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
-    errs() << "'" << CPU
-           << "' is not a recognized processor for this target"
-           << " (ignoring processor)\n";
+    if (CPU != "help") // Don't error if the user asked for help.
+      errs() << "'" << CPU
+             << "' is not a recognized processor for this target"
+             << " (ignoring processor)\n";
     return MCSchedModel::GetDefaultSchedModel();
   }
   assert(Found->Value && "Missing processor SchedModel value");
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 2416525..6582574 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -55,13 +55,7 @@ void MCSymbol::setVariableValue(const MCExpr *Value) {
   assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
   this->Value = Value;
-
-  // Variables should always be marked as in the same "section" as the value.
-  const MCSection *Section = Value->FindAssociatedSection();
-  if (Section)
-    setSection(*Section);
-  else
-    setUndefined();
+  this->Section = nullptr;
 }
 
 void MCSymbol::print(raw_ostream &OS) const {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 5e9e86f..837f585 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -649,38 +649,18 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
   }
 }
 
-void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm,
-                                                   const MCAsmLayout &Layout) {
-  for (MCSymbolData &SD : Asm.symbols()) {
-    if (!SD.getSymbol().isVariable())
-      continue;
-
-    // Is the variable is a symbol difference (SA - SB + C) expression,
-    // and neither symbol is external, mark the variable as absolute.
-    const MCExpr *Expr = SD.getSymbol().getVariableValue();
-    MCValue Value;
-    if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) {
-      if (Value.getSymA() && Value.getSymB())
-        const_cast<MCSymbol*>(&SD.getSymbol())->setAbsolute();
-    }
-  }
-}
-
 void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
                                                 const MCAsmLayout &Layout) {
   computeSectionAddresses(Asm, Layout);
 
   // Create symbol data for any indirect symbols.
   BindIndirectSymbols(Asm);
-
-  // Mark symbol difference expressions in variables (from .set or = directives)
-  // as absolute.
-  markAbsoluteVariableSymbols(Asm, Layout);
 }
 
 bool MachObjectWriter::
 IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                        const MCSymbolData &DataA,
+                                       const MCSymbolData *DataB,
                                        const MCFragment &FB,
                                        bool InSet,
                                        bool IsPCRel) const {
@@ -1027,7 +1007,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
 }
 
 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
-                                             raw_ostream &OS,
+                                             raw_pwrite_stream &OS,
                                              bool IsLittleEndian) {
   return new MachObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index ec6c9cb..b600baf 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -81,11 +81,12 @@ static std::string Join(const std::vector<std::string> &V) {
 }
 
 /// Adding features.
-void SubtargetFeatures::AddFeature(StringRef String) {
-  // Don't add empty features or features we already have.
+void SubtargetFeatures::AddFeature(StringRef String, bool Enable) {
+  // Don't add empty features.
   if (!String.empty())
     // Convert to lowercase, prepend flag if we don't already have a flag.
-    Features.push_back(hasFlag(String) ? String.str() : "+" + String.lower());
+    Features.push_back(hasFlag(String) ? String.lower()
+                                       : (Enable ? "+" : "-") + String.lower());
 }
 
 /// Find KV in array using binary search.
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index c6bc81d..38bb883 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -126,8 +126,8 @@ public:
 
   bool UseBigObj;
 
-  WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
-  
+  WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_pwrite_stream &OS);
+
   void reset() override {
     memset(&Header, 0, sizeof(Header));
     Header.Machine = TargetObjectWriter->getMachine();
@@ -172,6 +172,7 @@ public:
 
   bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                               const MCSymbolData &DataA,
+                                              const MCSymbolData *DataB,
                                               const MCFragment &FB, bool InSet,
                                               bool IsPCRel) const override;
 
@@ -257,7 +258,7 @@ size_t COFFSection::size() {
 // WinCOFFObjectWriter class implementation
 
 WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
-                                         raw_ostream &OS)
+                                         raw_pwrite_stream &OS)
     : MCObjectWriter(OS, true), TargetObjectWriter(MOTW) {
   memset(&Header, 0, sizeof(Header));
 
@@ -382,9 +383,7 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
 
       coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol());
     } else {
-      std::string WeakName = std::string(".weak.")
-                           +  Symbol.getName().str()
-                           + ".default";
+      std::string WeakName = (".weak." + Symbol.getName() + ".default").str();
       COFFSymbol *WeakDefault = createSymbol(WeakName);
       WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
       WeakDefault->Data.StorageClass  = COFF::IMAGE_SYM_CLASS_EXTERNAL;
@@ -651,16 +650,17 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
 }
 
 bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
-    const MCAssembler &Asm, const MCSymbolData &DataA, const MCFragment &FB,
-    bool InSet, bool IsPCRel) const {
+    const MCAssembler &Asm, const MCSymbolData &DataA,
+    const MCSymbolData *DataB, const MCFragment &FB, bool InSet,
+    bool IsPCRel) const {
   // MS LINK expects to be able to replace all references to a function with a
   // thunk to implement their /INCREMENTAL feature.  Make sure we don't optimize
   // away any relocations to functions.
   if ((((DataA.getFlags() & COFF::SF_TypeMask) >> COFF::SF_TypeShift) >>
        COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
     return false;
-  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA, FB,
-                                                                InSet, IsPCRel);
+  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+      Asm, DataA, DataB, FB, InSet, IsPCRel);
 }
 
 bool WinCOFFObjectWriter::isWeak(const MCSymbolData &SD) const {
@@ -1073,9 +1073,8 @@ void MCWinCOFFObjectTargetWriter::anchor() {}
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter factory function
 
-namespace llvm {
-  MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
-                                            raw_ostream &OS) {
-    return new WinCOFFObjectWriter(MOTW, OS);
-  }
+MCObjectWriter *
+llvm::createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+                                raw_pwrite_stream &OS) {
+  return new WinCOFFObjectWriter(MOTW, OS);
 }
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index f902d2b..8f9aacb 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
 
 namespace llvm {
 MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     MCCodeEmitter &CE, raw_ostream &OS)
+                                     MCCodeEmitter &CE, raw_pwrite_stream &OS)
     : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {}
 
 void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index ad278a4..4c38b8f 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -262,7 +262,7 @@ std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
   }
   const section_iterator SecEnd = section_end();
   uint64_t AfterAddr = UnknownAddressOrSize;
-  for (const symbol_iterator &SymbI : symbols()) {
+  for (const symbol_iterator SymbI : symbols()) {
     section_iterator SecI = SecEnd;
     if (std::error_code EC = SymbI->getSection(SecI))
       return EC;
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 4a1c311..7129aa3 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1537,7 +1537,7 @@ bool ExportEntry::operator==(const ExportEntry &Other) const {
   if (Stack.size() != Other.Stack.size())
     return false;
   // Not equal if different cumulative strings.
-  if (!CumulativeString.str().equals(Other.CumulativeString.str()))
+  if (!CumulativeString.equals(Other.CumulativeString))
     return false;
   // Equal if all nodes in both stacks match.
   for (unsigned i=0; i < Stack.size(); ++i) {
@@ -1559,7 +1559,7 @@ uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) {
 }
 
 StringRef ExportEntry::name() const {
-  return CumulativeString.str();
+  return CumulativeString;
 }
 
 uint64_t ExportEntry::flags() const {
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index 4bc8f92..b771a18 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -395,7 +395,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
 
 Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
                                    StringRef Value) const {
-  unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
+  unsigned Index = BaseArgs.MakeIndex((Opt.getName() + Value).str());
   SynthesizedArgs.push_back(make_unique<Arg>(
       Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index,
       BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 2533fa0..228f75e 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -162,7 +162,7 @@ APInt& APInt::operator=(uint64_t RHS) {
   return clearUnusedBits();
 }
 
-/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+/// This method 'profiles' an APInt for use with FoldingSet.
 void APInt::Profile(FoldingSetNodeID& ID) const {
   ID.AddInteger(BitWidth);
 
@@ -176,7 +176,7 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
     ID.AddInteger(pVal[i]);
 }
 
-/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// This function adds a single "digit" integer, y, to the multiple
 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 /// @returns the carry of the addition.
@@ -202,7 +202,7 @@ APInt& APInt::operator++() {
   return clearUnusedBits();
 }
 
-/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// This function subtracts a single "digit" (64-bit word), y, from
 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
@@ -231,7 +231,7 @@ APInt& APInt::operator--() {
   return clearUnusedBits();
 }
 
-/// add - This function adds the integer array x to the integer array Y and
+/// This function adds the integer array x to the integer array Y and
 /// places the result in dest.
 /// @returns the carry out from the addition
 /// @brief General addition of 64-bit integer arrays
@@ -680,12 +680,12 @@ bool APInt::isSplat(unsigned SplatSizeInBits) const {
   return *this == rotl(SplatSizeInBits);
 }
 
-/// HiBits - This function returns the high "numBits" bits of this APInt.
+/// This function returns the high "numBits" bits of this APInt.
 APInt APInt::getHiBits(unsigned numBits) const {
   return APIntOps::lshr(*this, BitWidth - numBits);
 }
 
-/// LoBits - This function returns the low "numBits" bits of this APInt.
+/// This function returns the low "numBits" bits of this APInt.
 APInt APInt::getLoBits(unsigned numBits) const {
   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
                         BitWidth - numBits);
@@ -861,7 +861,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
   return isNeg ? -Tmp : Tmp;
 }
 
-/// RoundToDouble - This function converts this APInt to a double.
+/// This function converts this APInt to a double.
 /// The layout for double is as following (IEEE Standard 754):
 ///  --------------------------------------
 /// |  Sign    Exponent    Fraction    Bias |
@@ -2269,9 +2269,8 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
   std::reverse(Str.begin()+StartDig, Str.end());
 }
 
-/// toString - This returns the APInt as a std::string.  Note that this is an
-/// inefficient method.  It is better to pass in a SmallVector/SmallString
-/// to the methods above.
+/// Returns the APInt as a std::string. Note that this is an inefficient method.
+/// It is better to pass in a SmallVector/SmallString to the methods above.
 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
   SmallString<40> S;
   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index af6c605..3cabc54 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -313,7 +313,7 @@ static Option *LookupNearestOption(StringRef Arg,
         if (RHS.empty() || !PermitValue)
           NearestString = OptionNames[i];
         else
-          NearestString = std::string(OptionNames[i]) + "=" + RHS.str();
+          NearestString = (Twine(OptionNames[i]) + "=" + RHS).str();
       }
     }
   }
@@ -784,7 +784,7 @@ class StrDupSaver : public StringSaver {
   std::vector<char *> Dups;
 
 public:
-  ~StrDupSaver() {
+  ~StrDupSaver() override {
     for (std::vector<char *>::iterator I = Dups.begin(), E = Dups.end(); I != E;
          ++I) {
       char *Dup = *I;
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index a44b958..c243155 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -54,9 +54,7 @@ class DataFileStreamer : public DataStreamer {
  int Fd;
 public:
   DataFileStreamer() : Fd(0) {}
-  virtual ~DataFileStreamer() {
-    close(Fd);
-  }
+  ~DataFileStreamer() override { close(Fd); }
   size_t GetBytes(unsigned char *buf, size_t len) override {
     NumStreamFetches++;
     return read(Fd, buf, len);
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index a88b18e..eb99242 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -114,9 +114,9 @@ static void debug_user_sig_handler(void *Cookie) {
   // know that debug mode is enabled and dbgs() really is a
   // circular_raw_ostream.  If NDEBUG is defined, then dbgs() ==
   // errs() but this will never be invoked.
-  llvm::circular_raw_ostream *dbgout =
-    static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs());
-  dbgout->flushBufferWithBanner();
+  llvm::circular_raw_ostream &dbgout =
+      static_cast<circular_raw_ostream &>(llvm::dbgs());
+  dbgout.flushBufferWithBanner();
 }
 
 /// dbgs - Return a circular-buffered debug stream.
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 80d2aef..b8538ff 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -51,8 +51,8 @@ bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
 ///
 void FoldingSetNodeID::AddPointer(const void *Ptr) {
   // Note: this adds pointers to the hash using sizes and endianness that
-  // depend on the host.  It doesn't matter however, because hashing on
-  // pointer values in inherently unstable.  Nothing  should depend on the 
+  // depend on the host. It doesn't matter, however, because hashing on
+  // pointer values is inherently unstable. Nothing should depend on the
   // ordering of nodes in the folding set.
   Bits.append(reinterpret_cast<unsigned *>(&Ptr),
               reinterpret_cast<unsigned *>(&Ptr+1));
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index fd4ce54..97aedc8 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -92,7 +92,7 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args,
     errs() << " done. \n";
   } else {
     sys::ExecuteNoWait(ExecPath, args.data(), nullptr, nullptr, 0, &ErrMsg);
-    errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+    errs() << "Remember to erase graph file: " << Filename << "\n";
   }
   return false;
 }
@@ -140,6 +140,29 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait,
   std::string ViewerPath;
   GraphSession S;
 
+#ifdef __APPLE__
+  if (S.TryFindProgram("open", ViewerPath)) {
+    std::vector<const char *> args;
+    args.push_back(ViewerPath.c_str());
+    if (wait)
+      args.push_back("-W");
+    args.push_back(Filename.c_str());
+    args.push_back(nullptr);
+    errs() << "Trying 'open' program... ";
+    if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg))
+      return false;
+  }
+#endif
+  if (S.TryFindProgram("xdg-open", ViewerPath)) {
+    std::vector<const char *> args;
+    args.push_back(ViewerPath.c_str());
+    args.push_back(Filename.c_str());
+    args.push_back(nullptr);
+    errs() << "Trying 'xdg-open' program... ";
+    if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg))
+      return false;
+  }
+
   // Graphviz
   if (S.TryFindProgram("Graphviz", ViewerPath)) {
     std::vector<const char *> args;
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 0e9a62e..726961a 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -182,19 +182,21 @@ static bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
 #endif
 }
 
-static bool OSHasAVXSupport() {
+static bool GetX86XCR0(unsigned *rEAX, unsigned *rEDX) {
 #if defined(__GNUC__)
   // Check xgetbv; this uses a .byte sequence instead of the instruction
   // directly because older assemblers do not include support for xgetbv and
   // there is no easy way to conditionally compile based on the assembler used.
-  int rEAX, rEDX;
-  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (*rEAX), "=d" (*rEDX) : "c" (0));
+  return false;
 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
-  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+  *rEAX = Result;
+  *rEDX = Result >> 32;
+  return false;
 #else
-  int rEAX = 0; // Ensures we return false
+  return true;
 #endif
-  return (rEAX & 6) == 6;
 }
 
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -223,19 +225,30 @@ StringRef sys::getHostCPUName() {
     char     c[12];
   } text;
 
-  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
-
-  unsigned MaxLeaf = EAX;
-  bool HasSSE3 = (ECX & 0x1);
-  bool HasSSE41 = (ECX & 0x80000);
-  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 
+  unsigned MaxLeaf;
+  GetX86CpuIDAndInfo(0, &MaxLeaf, text.u+0, text.u+2, text.u+1);
+
+  bool HasMMX   = (EDX >> 23) & 1;
+  bool HasSSE   = (EDX >> 25) & 1;
+  bool HasSSE2  = (EDX >> 26) & 1;
+  bool HasSSE3  = (ECX >>  0) & 1;
+  bool HasSSSE3 = (ECX >>  9) & 1;
+  bool HasSSE41 = (ECX >> 19) & 1;
+  bool HasSSE42 = (ECX >> 20) & 1;
+  bool HasMOVBE = (ECX >> 22) & 1;
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
   // switch, then we have full AVX support.
   const unsigned AVXBits = (1 << 27) | (1 << 28);
-  bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
-  bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
-                 !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
-                 (EBX & 0x20);
+  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !GetX86XCR0(&EAX, &EDX) &&
+                ((EAX & 0x6) == 0x6);
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+  bool HasLeaf7 = MaxLeaf >= 0x7 &&
+                  !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
+  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
+  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
+
   GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   bool Em64T = (EDX >> 29) & 0x1;
   bool HasTBM = (ECX >> 21) & 0x1;
@@ -298,6 +311,8 @@ StringRef sys::getHostCPUName() {
       case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
       case 13: // Intel Pentium M processor, Intel Celeron M processor, model
                // 0Dh. All processors are manufactured using the 90 nm process.
+      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
         return "pentium-m";
 
       case 14: // Intel Core Duo processor, Intel Core Solo processor, model
@@ -313,74 +328,85 @@ StringRef sys::getHostCPUName() {
                // manufactured using the 65 nm process
         return "core2";
 
-      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-        return "i686"; // FIXME: ???
-
       case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
                //
                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
-        // Not all Penryn processors support SSE 4.1 (such as the Pentium brand)
-        return HasSSE41 ? "penryn" : "core2";
+      case 29: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+        return "penryn";
 
       case 26: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
-      case 29: // Intel Xeon processor MP. All processors are manufactured using
-               // the 45 nm process.
       case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
                // As found in a Summer 2010 model iMac.
+      case 46: // Nehalem EX
+        return "nehalem";
       case 37: // Intel Core i7, laptop version.
       case 44: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 32 nm process.
-      case 46: // Nehalem EX
       case 47: // Westmere EX
-        return "corei7";
+        return "westmere";
 
       // SandyBridge:
       case 42: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
       case 45:
-        // Not all Sandy Bridge processors support AVX (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX ? "corei7-avx" : "corei7";
+        return "sandybridge";
 
       // Ivy Bridge:
       case 58:
       case 62: // Ivy Bridge EP
-        // Not all Ivy Bridge processors support AVX (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX ? "core-avx-i" : "corei7";
+        return "ivybridge";
 
       // Haswell:
       case 60:
       case 63:
       case 69:
       case 70:
-        // Not all Haswell processors support AVX2 (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX2 ? "core-avx2" : "corei7";
+        return "haswell";
 
       // Broadwell:
       case 61:
-        // Not all Broadwell processors support AVX2 (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX2 ? "broadwell" : "corei7";
+        return "broadwell";
 
       case 28: // Most 45 nm Intel Atom processors
       case 38: // 45 nm Atom Lincroft
       case 39: // 32 nm Atom Medfield
       case 53: // 32 nm Atom Midview
       case 54: // 32 nm Atom Midview
-        return "atom";
+        return "bonnell";
 
       // Atom Silvermont codes from the Intel software optimization guide.
       case 55:
       case 74:
       case 77:
-        return "slm";
-
-      default: return (Em64T) ? "x86-64" : "i686";
+        return "silvermont";
+
+      default: // Unknown family 6 CPU, try to guess.
+        if (HasAVX512)
+          return "knl";
+        if (HasADX)
+          return "broadwell";
+        if (HasAVX2)
+          return "haswell";
+        if (HasAVX)
+          return "sandybridge";
+        if (HasSSE42)
+          return HasMOVBE ? "silvermont" : "nehalem";
+        if (HasSSE41)
+          return "penryn";
+        if (HasSSSE3)
+          return HasMOVBE ? "bonnell" : "core2";
+        if (Em64T)
+          return "x86-64";
+        if (HasSSE2)
+          return "pentium-m";
+        if (HasSSE)
+          return "pentium3";
+        if (HasMMX)
+          return "pentium2";
+        return "pentiumpro";
       }
     case 15: {
       switch (Model) {
@@ -681,7 +707,89 @@ StringRef sys::getHostCPUName() {
 }
 #endif
 
-#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  unsigned MaxLevel;
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+
+  if (GetX86CpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
+      MaxLevel < 1)
+    return false;
+
+  GetX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+
+  Features["cmov"]   = (EDX >> 15) & 1;
+  Features["mmx"]    = (EDX >> 23) & 1;
+  Features["sse"]    = (EDX >> 25) & 1;
+  Features["sse2"]   = (EDX >> 26) & 1;
+  Features["sse3"]   = (ECX >>  0) & 1;
+  Features["ssse3"]  = (ECX >>  9) & 1;
+  Features["sse4.1"] = (ECX >> 19) & 1;
+  Features["sse4.2"] = (ECX >> 20) & 1;
+
+  Features["pclmul"] = (ECX >>  1) & 1;
+  Features["cx16"]   = (ECX >> 13) & 1;
+  Features["movbe"]  = (ECX >> 22) & 1;
+  Features["popcnt"] = (ECX >> 23) & 1;
+  Features["aes"]    = (ECX >> 25) & 1;
+  Features["rdrnd"]  = (ECX >> 30) & 1;
+
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+  // indicates that the AVX registers will be saved and restored on context
+  // switch, then we have full AVX support.
+  bool HasAVX = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
+                !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
+  Features["avx"]    = HasAVX;
+  Features["fma"]    = HasAVX && (ECX >> 12) & 1;
+  Features["f16c"]   = HasAVX && (ECX >> 29) & 1;
+
+  // AVX512 requires additional context to be saved by the OS.
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+
+  unsigned MaxExtLevel;
+  GetX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+                     !GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
+  Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
+  Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
+  Features["xop"]    = HasAVX && HasExtLeaf1 && ((ECX >> 11) & 1);
+  Features["fma4"]   = HasAVX && HasExtLeaf1 && ((ECX >> 16) & 1);
+  Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
+
+  bool HasLeaf7 = MaxLevel >= 7 &&
+                  !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+  // AVX2 is only supported if we have the OS save support from AVX.
+  Features["avx2"]     = HasAVX && HasLeaf7 && (EBX >>  5) & 1;
+
+  Features["fsgsbase"] = HasLeaf7 && ((EBX >>  0) & 1);
+  Features["bmi"]      = HasLeaf7 && ((EBX >>  3) & 1);
+  Features["hle"]      = HasLeaf7 && ((EBX >>  4) & 1);
+  Features["bmi2"]     = HasLeaf7 && ((EBX >>  8) & 1);
+  Features["rtm"]      = HasLeaf7 && ((EBX >> 11) & 1);
+  Features["rdseed"]   = HasLeaf7 && ((EBX >> 18) & 1);
+  Features["adx"]      = HasLeaf7 && ((EBX >> 19) & 1);
+  Features["sha"]      = HasLeaf7 && ((EBX >> 29) & 1);
+
+  // AVX512 is only supported if the OS supports the context save for it.
+  Features["avx512f"]  = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
+  Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
+  Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
+  Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
+  Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
+  Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
+  Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
+
+  return true;
+}
+#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line
   // in all cases.
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index d0c1748..3571cd3 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -26,24 +26,6 @@ using namespace sys;
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-/// \brief A helper function to compute the elapsed wall-time since the program
-/// started.
-///
-/// Note that this routine actually computes the elapsed wall time since the
-/// first time it was called. However, we arrange to have it called during the
-/// startup of the process to get approximately correct results.
-static TimeValue getElapsedWallTime() {
-  static TimeValue &StartTime = *new TimeValue(TimeValue::now());
-  return TimeValue::now() - StartTime;
-}
-
-/// \brief A special global variable to ensure we call \c getElapsedWallTime
-/// during global initialization of the program.
-///
-/// Note that this variable is never referenced elsewhere. Doing so could
-/// create race conditions during program startup or shutdown.
-static volatile TimeValue DummyTimeValue = getElapsedWallTime();
-
 Optional<std::string> Process::FindInEnvPath(const std::string& EnvName,
                                              const std::string& FileName)
 {
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index d3e29ac..e8344ef 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -15,6 +15,7 @@
 #include "regex_impl.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include <string>
 using namespace llvm;
 
@@ -158,7 +159,7 @@ std::string Regex::sub(StringRef Repl, StringRef String,
           RefValue < Matches.size())
         Res += Matches[RefValue];
       else if (Error && Error->empty())
-        *Error = "invalid backreference string '" + Ref.str() + "'";
+        *Error = ("invalid backreference string '" + Twine(Ref) + "'").str();
       break;
     }
     }
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index d4b150a..5b43ecc 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -92,7 +92,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case sparcv9:
   case sparc:       return "sparc";
 
-  case systemz:     return "systemz";
+  case systemz:     return "s390";
 
   case x86:
   case x86_64:      return "x86";
@@ -1111,7 +1111,7 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
       .Cases("v7m", "v7-m", "cortex-m3")
       .Cases("v7em", "v7e-m", "cortex-m4")
       .Cases("v8", "v8a", "v8-a", "cortex-a53")
-      .Cases("v8.1a", "v8.1-a", "generic-armv8.1-a")
+      .Cases("v8.1a", "v8.1-a", "generic")
       .Default(nullptr);
   else
     result = llvm::StringSwitch<const char *>(MArch)
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index a9b48e0..057bcab1 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -486,12 +486,12 @@ void llvm::sys::DisableSystemDialogsOnCrash() {}
 
 /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void llvm::sys::PrintStackTraceOnErrorSignal() {
+void llvm::sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) {
   AddSignalHandler(PrintStackTraceSignalHandler, nullptr);
 
 #if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES)
   // Environment variable to disable any kind of crash dialog.
-  if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
+  if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) {
     mach_port_t self = mach_task_self();
 
     exception_mask_t mask = EXC_MASK_CRASH;
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index d558ff5..b5523aa 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -261,6 +261,7 @@ std::error_code rename(const Twine &from, const Twine &to) {
                       MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
       return std::error_code();
     DWORD LastError = ::GetLastError();
+    ec = windows_error(LastError);
     if (LastError != ERROR_ACCESS_DENIED)
       break;
     // Retry MoveFile() at ACCESS_DENIED.
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index de6bf1c..f070111 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -389,7 +389,7 @@ void sys::DisableSystemDialogsOnCrash() {
 
 /// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void sys::PrintStackTraceOnErrorSignal() {
+void sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) {
   DisableSystemDialogsOnCrash();
   RegisterHandler();
   LeaveCriticalSection(&CriticalSection);
diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc
index 0223ab4..b90b4f1 100644
--- a/lib/Support/Windows/TimeValue.inc
+++ b/lib/Support/Windows/TimeValue.inc
@@ -47,6 +47,7 @@ std::string TimeValue::str() const {
   __time64_t OurTime = this->toEpochTime();
   int Error = ::_localtime64_s(&Storage, &OurTime);
   assert(!Error);
+  (void)Error;
   LT = &Storage;
 #endif
 
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 051e2dd..6f9f910 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -487,51 +487,53 @@ void format_object_base::home() {
 //  raw_fd_ostream
 //===----------------------------------------------------------------------===//
 
-raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
-                               sys::fs::OpenFlags Flags)
-    : Error(false), UseAtomicWrites(false), pos(0) {
-  EC = std::error_code();
+static int getFD(StringRef Filename, std::error_code &EC,
+                 sys::fs::OpenFlags Flags) {
   // Handle "-" as stdout. Note that when we do this, we consider ourself
   // the owner of stdout. This means that we can do things like close the
   // file descriptor when we're done and set the "binary" flag globally.
   if (Filename == "-") {
-    FD = STDOUT_FILENO;
+    EC = std::error_code();
     // If user requested binary then put stdout into binary mode if
     // possible.
     if (!(Flags & sys::fs::F_Text))
       sys::ChangeStdoutToBinary();
-    // Close stdout when we're done, to detect any output errors.
-    ShouldClose = true;
-    return;
+    return STDOUT_FILENO;
   }
 
+  int FD;
   EC = sys::fs::openFileForWrite(Filename, FD, Flags);
+  if (EC)
+    return -1;
 
-  if (EC) {
-    ShouldClose = false;
-    return;
-  }
-
-  // Ok, we successfully opened the file, so it'll need to be closed.
-  ShouldClose = true;
+  return FD;
 }
 
-/// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
-/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
+                               sys::fs::OpenFlags Flags)
+    : raw_fd_ostream(getFD(Filename, EC, Flags), true) {}
+
+/// FD is the file descriptor that this writes to.  If ShouldClose is true, this
+/// closes the file when the stream is destroyed.
 raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
-  : raw_ostream(unbuffered), FD(fd),
-    ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
-#ifdef O_BINARY
-  // Setting STDOUT to binary mode is necessary in Win32
-  // to avoid undesirable linefeed conversion.
-  // Don't touch STDERR, or w*printf() (in assert()) would barf wide chars.
-  if (fd == STDOUT_FILENO)
-    setmode(fd, O_BINARY);
-#endif
+    : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
+      Error(false), UseAtomicWrites(false) {
+  if (FD < 0 ) {
+    ShouldClose = false;
+    return;
+  }
 
   // Get the starting position.
   off_t loc = ::lseek(FD, 0, SEEK_CUR);
-  if (loc == (off_t)-1)
+#ifdef LLVM_ON_WIN32
+  // MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes.
+  sys::fs::file_status Status;
+  std::error_code EC = status(FD, Status);
+  SupportsSeeking = !EC && Status.type() == sys::fs::file_type::regular_file;
+#else
+  SupportsSeeking = loc != (off_t)-1;
+#endif
+  if (!SupportsSeeking)
     pos = 0;
   else
     pos = static_cast<uint64_t>(loc);
@@ -623,11 +625,18 @@ void raw_fd_ostream::close() {
 uint64_t raw_fd_ostream::seek(uint64_t off) {
   flush();
   pos = ::lseek(FD, off, SEEK_SET);
-  if (pos != off)
+  if (pos == (uint64_t)-1)
     error_detected();
   return pos;
 }
 
+void raw_fd_ostream::pwrite(const char *Ptr, size_t Size, uint64_t Offset) {
+  uint64_t Pos = tell();
+  seek(Offset);
+  write(Ptr, Size);
+  seek(Pos);
+}
+
 size_t raw_fd_ostream::preferred_buffer_size() const {
 #if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
   // Windows and Minix have no st_blksize.
@@ -708,7 +717,9 @@ raw_ostream &llvm::outs() {
   // Set buffer settings to model stdout behavior.
   // Delete the file descriptor when the program exits, forcing error
   // detection. If you don't want this behavior, don't use outs().
-  static raw_fd_ostream S(STDOUT_FILENO, true);
+  std::error_code EC;
+  static raw_fd_ostream S("-", EC, sys::fs::F_None);
+  assert(!EC);
   return S;
 }
 
@@ -749,7 +760,14 @@ void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
 // capacity. This allows raw_ostream to write directly into the correct place,
 // and we only need to set the vector size when the data is flushed.
 
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O, unsigned)
+    : OS(O) {}
+
 raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+  init();
+}
+
+void raw_svector_ostream::init() {
   // Set up the initial external buffer. We make sure that the buffer has at
   // least 128 bytes free; raw_ostream itself only requires 64, but we want to
   // make sure that we don't grow the buffer unnecessarily on destruction (when
@@ -763,6 +781,17 @@ raw_svector_ostream::~raw_svector_ostream() {
   flush();
 }
 
+void raw_svector_ostream::pwrite(const char *Ptr, size_t Size,
+                                 uint64_t Offset) {
+  flush();
+
+  uint64_t End = Offset + Size;
+  if (End > OS.size())
+    OS.resize(End);
+
+  memcpy(OS.begin() + Offset, Ptr, Size);
+}
+
 /// resync - This is called when the SmallVector we're appending to is changed
 /// outside of the raw_svector_ostream's control.  It is only safe to do this
 /// if the raw_svector_ostream has previously been flushed.
@@ -817,3 +846,5 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
 uint64_t raw_null_ostream::current_pos() const {
   return 0;
 }
+
+void raw_null_ostream::pwrite(const char *Ptr, size_t Size, uint64_t Offset) {}
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 4ae9903..8a8f0ee 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -2040,7 +2040,7 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
 /// to CurRec's name.
 Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
                         Init *Name, const std::string &Scoper) {
-  RecTy *Type = dyn_cast<TypedInit>(Name)->getType();
+  RecTy *Type = cast<TypedInit>(Name)->getType();
 
   BinOpInit *NewName =
     BinOpInit::get(BinOpInit::STRCONCAT, 
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index 1f750fc..cbc30be 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -87,8 +87,7 @@ private:
 
 public:
   TGLexer(SourceMgr &SrcMgr);
-  ~TGLexer() {}
-  
+
   tgtok::TokKind Lex() {
     return CurCode = LexToken();
   }
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index bb3db4b..9a7d6c8 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -32,9 +32,6 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
 def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
   "Enable ARMv8 CRC-32 checksum instructions">;
 
-def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
-  "Enable ARMv8.1a extensions", [FeatureCRC]>;
-
 /// Cyclone has register move instructions which are "free".
 def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
                                         "Has zero-cycle register moves">;
@@ -44,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                         "Has zero-cycle zeroing instructions">;
 
 //===----------------------------------------------------------------------===//
+// Architectures.
+//
+
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+  "Support ARM v8.1a instructions", [FeatureCRC]>;
+
+//===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
@@ -92,10 +96,6 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
                                               FeatureNEON,
                                               FeatureCRC]>;
 
-def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a,
-                                                         FeatureNEON,
-                                                         FeatureCrypto]>;
-
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
 // FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
@@ -123,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant {
 // AsmWriter bits get associated with the correct class.
 def GenericAsmWriter : AsmWriter {
   string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
   int Variant = 0;
   bit isMCAsmWriter = 1;
 }
 
 def AppleAsmWriter : AsmWriter {
   let AsmWriterClassName = "AppleInstPrinter";
+  int PassSubtarget = 1;
   int Variant = 1;
   int isMCAsmWriter = 1;
 }
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 1b4483a..0821cff 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -131,29 +131,6 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
     OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
     SM.serializeToStackMapSection();
   }
-
-  // Emit a .data.rel section containing any stubs that were created.
-  if (TT.isOSBinFormatELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        OutStreamer.EmitLabel(Stubs[i].first);
-        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
-
 }
 
 MachineLocation
@@ -371,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   assert(NOps == 4);
   OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
   // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
-  OS << V.getName();
+  OS << cast<MDLocalVariable>(MI->getOperand(NOps - 2).getMetadata())
+            ->getName();
   OS << " <- ";
   // Frame address.  Currently handles register +- offset only.
   assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 568f258..efdb2e3 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -328,7 +328,7 @@ static void initReachingDef(const MachineFunction &MF,
         const uint32_t *PreservedRegs = MO.getRegMask();
 
         // Set generated regs.
-        for (const auto Entry : RegToId) {
+        for (const auto &Entry : RegToId) {
           unsigned Reg = Entry.second;
           // Use the global register ID when querying APIs external to this
           // pass.
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 41b1132..c2470f7 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return expandMOVImm(MBB, MBBI, 32);
   case AArch64::MOVi64imm:
     return expandMOVImm(MBB, MBBI, 64);
-  case AArch64::RET_ReallyLR:
-    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
-        .addReg(AArch64::LR);
+  case AArch64::RET_ReallyLR: {
+    MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
+          .addReg(AArch64::LR);
+    transferImpOps(MI, MIB, MIB);
     MI.eraseFromParent();
     return true;
   }
+  }
   return false;
 }
 
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 99cb641..c3f6859 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1917,7 +1917,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
     // could select it. Emit a copy to subreg if necessary. FastISel will remove
     // it when it selects the integer extend.
     unsigned Reg = lookUpRegForValue(IntExtVal);
-    if (!Reg) {
+    auto *MI = MRI.getUniqueVRegDef(Reg);
+    if (!MI) {
       if (RetVT == MVT::i64 && VT <= MVT::i32) {
         if (WantZExt) {
           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
@@ -1935,10 +1936,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
     // The integer extend has already been emitted - delete all the instructions
     // that have been emitted by the integer extend lowering code and use the
     // result from the load instruction directly.
-    while (Reg) {
-      auto *MI = MRI.getUniqueVRegDef(Reg);
-      if (!MI)
-        break;
+    while (MI) {
       Reg = 0;
       for (auto &Opnd : MI->uses()) {
         if (Opnd.isReg()) {
@@ -1947,6 +1945,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
         }
       }
       MI->eraseFromParent();
+      MI = nullptr;
+      if (Reg)
+        MI = MRI.getUniqueVRegDef(Reg);
     }
     updateValueMap(IntExtVal, ResultReg);
     return true;
@@ -3034,6 +3035,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
 
     // Copy all of the result registers out of their specified physreg.
     MVT CopyVT = RVLocs[0].getValVT();
+
+    // TODO: Handle big-endian results
+    if (CopyVT.isVector() && !Subtarget->isLittleEndian())
+      return false;
+
     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), ResultReg)
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 84bf317..bd2af16 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -9,6 +9,82 @@
 //
 // This file contains the AArch64 implementation of TargetFrameLowering class.
 //
+// On AArch64, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that this doesn't depict the case where a red-zone is used. Also,
+// technically the last frame area (VLAs) doesn't get created until in the
+// main function body, after the prologue is run. However, it's depicted here
+// for completeness.
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------|
+// |                                   |
+// | prev_fp, prev_lr                  |
+// | (a.k.a. "frame record")           |
+// |-----------------------------------| <- fp(=x29)
+// |                                   |
+// | other callee-saved registers      |
+// |                                   |
+// |-----------------------------------|
+// |.empty.space.to.make.part.below....|
+// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
+// |.the.standard.16-byte.alignment....|  compile time; if present)
+// |-----------------------------------|
+// |                                   |
+// | local variables of fixed size     |
+// | including spill slots             |
+// |-----------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables....|       LLVM chooses X19)
+// |.(VLAs)............................| (size of this area is unknown at
+// |...................................|  compile time)
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitly needed when there are both VLAs and local
+//   variables with more-than-default alignment requirements.
+// * A frame pointer is definitly needed when there are local variables with
+//   more-than-default alignment requirements.
+//
+// In some cases when a base pointer is not strictly needed, it is generated
+// anyway when offsets from the frame pointer to access local variables become
+// so large that the offset can't be encoded in the immediate fields of loads
+// or stores.
+//
+// FIXME: also explain the redzone concept.
+// FIXME: also explain the concept of reserved call frames.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AArch64FrameLowering.h"
@@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
-static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
-    if (FixedOff > Offset)
-      Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset + Align - 1) / Align * Align;
-  }
-  // This does not include the 16 bytes used for fp and lr.
-  return (unsigned)Offset;
-}
-
 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
   if (!EnableRedZone)
     return false;
@@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
 /// pointer register.
 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  assert(!RegInfo->needsStackRealignment(MF) &&
-         "No stack realignment on AArch64!");
-#endif
-
   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
-          MFI->hasPatchPoint());
+          MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
 }
 
 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
   AFI->setLocalStackSize(NumBytes);
 
   // Allocate space for the rest of the frame.
-  if (NumBytes) {
-    // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
+
+  const unsigned Alignment = MFI->getMaxAlignment();
+  const bool NeedsRealignment = (Alignment > 16);
+  unsigned scratchSPReg = AArch64::SP;
+  if (NeedsRealignment) {
+    // Use the first callee-saved register as a scratch register
+    assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
+           "No scratch register to align SP!");
+    scratchSPReg = AArch64::X9;
+  }
+
+  // If we're a leaf function, try using the red zone.
+  if (NumBytes && !canUseRedZone(MF))
+    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+    // the correct value here, as NumBytes also includes padding bytes,
+    // which shouldn't be counted here.
+    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+                    MachineInstr::FrameSetup);
+
+  assert(!(NeedsRealignment && NumBytes==0) &&
+         "NumBytes should never be 0 when realignment is needed");
+
+  if (NumBytes && NeedsRealignment) {
+    const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+    assert(NrBitsToZero > 1);
+    assert(scratchSPReg != AArch64::SP);
+
+    // SUB X9, SP, NumBytes
+    //   -- X9 is temporary register, so shouldn't contain any live data here,
+    //   -- free to use. This is already produced by emitFrameOffset above.
+    // AND SP, X9, 0b11111...0000
+    // The logical immediates have a non-trivial encoding. The following
+    // formula computes the encoded immediate with all ones but
+    // NrBitsToZero zero bits as least significant bits.
+    uint32_t andMaskEncoded =
+        (1                   <<12) // = N
+      | ((64-NrBitsToZero)   << 6) // immr
+      | ((64-NrBitsToZero-1) << 0) // imms
+      ;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+      .addReg(scratchSPReg, RegState::Kill)
+      .addImm(andMaskEncoded);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
@@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
   // FIXME: Clarify FrameSetup flags here.
   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
   // needed.
-  //
-  if (RegInfo->hasBasePointer(MF))
-    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+  if (RegInfo->hasBasePointer(MF)) {
+    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
+                     false);
+  }
 
   if (needsFrameMoves) {
     const DataLayout *TD = MF.getTarget().getDataLayout();
     const int StackGrowth = -TD->getPointerSize(0);
     unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
     // An example of the prologue:
     //
     //     .globl __foo
@@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
 
-  // Initial and residual are named for consitency with the prologue. Note that
+  // Initial and residual are named for consistency with the prologue. Note that
   // in the epilogue, the residual adjustment is executed first.
   uint64_t ArgumentPopSize = 0;
   if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
@@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
   bool isFixed = MFI->isFixedObjectIndex(FI);
 
   // Use frame pointer to reference fixed objects. Use it for locals if
-  // there are VLAs (and thus the SP isn't reliable as a base).
-  // Make sure useFPForScavengingIndex() does the right thing for the emergency
-  // spill slot.
+  // there are VLAs or a dynamically realigned SP (and thus the SP isn't
+  // reliable as a base). Make sure useFPForScavengingIndex() does the
+  // right thing for the emergency spill slot.
   bool UseFP = false;
   if (AFI->hasStackFrame()) {
     // Note: Keeping the following as multiple 'if' statements rather than
@@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     // Argument access should always use the FP.
     if (isFixed) {
       UseFP = hasFP(MF);
-    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
+               !RegInfo->needsStackRealignment(MF)) {
       // Use SP or FP, whichever gives us the best chance of the offset
       // being in range for direct access. If the FPOffset is positive,
       // that'll always be best, as the SP will be even further away.
@@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     }
   }
 
+  assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+         "In the presence of dynamic stack pointer realignment, "
+         "non-argument objects cannot be accessed through the frame pointer");
+
   if (UseFP) {
     FrameReg = RegInfo->getFrameRegister(MF);
     return FPOffset;
@@ -695,6 +787,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
     if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
       MIB.addReg(AArch64::SP, RegState::Define);
 
+    MBB.addLiveIn(Reg1);
+    MBB.addLiveIn(Reg2);
     MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
         .addReg(Reg1, getPrologueDeath(MF, Reg1))
         .addReg(AArch64::SP)
@@ -794,6 +888,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
   if (RegInfo->hasBasePointer(MF))
     MRI->setPhysRegUsed(RegInfo->getBaseRegister());
 
+  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
+    MRI->setPhysRegUsed(AArch64::X9);
+
   // If any callee-saved registers are used, the frame cannot be eliminated.
   unsigned NumGPRSpilled = 0;
   unsigned NumFPRSpilled = 0;
@@ -867,7 +964,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+  unsigned CFSize =
+      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
   bool BigStack = (CFSize >= 256);
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index df3875f..1439bf3 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
 public:
   explicit AArch64FrameLowering()
       : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
-                            false /*StackRealignable*/) {}
+                            true /*StackRealignable*/) {}
 
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 0a47dcb..f75700d 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -848,7 +848,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   //     MOV  X0, WideImmediate
   //     LDR  X2, [BaseReg, X0]
   if (isa<ConstantSDNode>(RHS)) {
-    int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue();
+    int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
     unsigned Scale = Log2_32(Size);
     // Skip the immediate can be seleced by load/store addressing mode.
     // Also skip the immediate can be encoded by a single ADD (SUB is also
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c0e856..90a5e5e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -281,14 +281,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 
-  // f16 is storage-only, so we promote operations to f32 if we know this is
-  // valid, and ignore them otherwise. The operations not mentioned here will
-  // fail to select, but this is not a major problem as no source language
-  // should be emitting native f16 operations yet.
-  setOperationAction(ISD::FADD, MVT::f16, Promote);
-  setOperationAction(ISD::FDIV, MVT::f16, Promote);
-  setOperationAction(ISD::FMUL, MVT::f16, Promote);
-  setOperationAction(ISD::FSUB, MVT::f16, Promote);
+  // f16 is a storage-only type, always promote it to f32.
+  setOperationAction(ISD::SETCC,       MVT::f16,  Promote);
+  setOperationAction(ISD::BR_CC,       MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT_CC,   MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT,      MVT::f16,  Promote);
+  setOperationAction(ISD::FADD,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSUB,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMUL,        MVT::f16,  Promote);
+  setOperationAction(ISD::FDIV,        MVT::f16,  Promote);
+  setOperationAction(ISD::FREM,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMA,         MVT::f16,  Promote);
+  setOperationAction(ISD::FNEG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FABS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FCEIL,       MVT::f16,  Promote);
+  setOperationAction(ISD::FCOPYSIGN,   MVT::f16,  Promote);
+  setOperationAction(ISD::FCOS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FFLOOR,      MVT::f16,  Promote);
+  setOperationAction(ISD::FNEARBYINT,  MVT::f16,  Promote);
+  setOperationAction(ISD::FPOW,        MVT::f16,  Promote);
+  setOperationAction(ISD::FPOWI,       MVT::f16,  Promote);
+  setOperationAction(ISD::FRINT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FSIN,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSINCOS,     MVT::f16,  Promote);
+  setOperationAction(ISD::FSQRT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP,        MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG10,      MVT::f16,  Promote);
+  setOperationAction(ISD::FROUND,      MVT::f16,  Promote);
+  setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
+  setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
+  setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
 
   // v4f16 is also a storage-only type, so promote it to v4f32 when that is
   // known to be safe.
@@ -481,6 +506,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // Enable TBZ/TBNZ
   MaskAndBranchFoldingIsLegal = true;
+  EnableExtLdPromotion = true;
 
   setMinFunctionAlignment(2);
 
@@ -1557,6 +1583,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
   if (Op.getOperand(0).getValueType().isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
 
+  // f16 conversions are promoted to f32.
+  if (Op.getOperand(0).getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        Op.getOpcode(), dl, Op.getValueType(),
+        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+  }
+
   if (Op.getOperand(0).getValueType() != MVT::f128) {
     // It's legal except when f128 is involved
     return Op;
@@ -1606,6 +1640,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
   if (Op.getValueType().isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
 
+  // f16 conversions are promoted to f32.
+  if (Op.getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        ISD::FP_ROUND, dl, MVT::f16,
+        DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+        DAG.getIntPtrConstant(0));
+  }
+
   // i128 conversions are libcalls.
   if (Op.getOperand(0).getValueType() == MVT::i128)
     return SDValue();
@@ -2701,8 +2744,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
             DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
         SDValue Cpy = DAG.getMemcpy(
             Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
-            /*isVol = */ false,
-            /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
+            /*isVol = */ false, /*AlwaysInline = */ false,
+            /*isTailCall = */ false,
+            DstInfo, MachinePointerInfo());
 
         MemOpChains.push_back(Cpy);
       } else {
@@ -3514,49 +3558,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
   return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
 }
 
-SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
-                                           SelectionDAG &DAG) const {
-  SDValue CC = Op->getOperand(0);
-  SDValue TVal = Op->getOperand(1);
-  SDValue FVal = Op->getOperand(2);
-  SDLoc DL(Op);
-
-  unsigned Opc = CC.getOpcode();
-  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
-  // instruction.
-  if (CC.getResNo() == 1 &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
-    // Only lower legal XALUO ops.
-    if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
-      return SDValue();
-
-    AArch64CC::CondCode OFCC;
-    SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
-    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
-                       CCVal, Overflow);
-  }
-
-  if (CC.getOpcode() == ISD::SETCC)
-    return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
-                           cast<CondCodeSDNode>(CC.getOperand(2))->get());
-  else
-    return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
-                           FVal, ISD::SETNE);
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+                                              SDValue RHS, SDValue TVal,
+                                              SDValue FVal, SDLoc dl,
                                               SelectionDAG &DAG) const {
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  SDValue TVal = Op.getOperand(2);
-  SDValue FVal = Op.getOperand(3);
-  SDLoc dl(Op);
-
   // Handle f128 first, because it will result in a comparison of some RTLIB
   // call result against zero.
   if (LHS.getValueType() == MVT::f128) {
@@ -3664,14 +3669,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
     SDValue CCVal;
     SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
 
-    EVT VT = Op.getValueType();
+    EVT VT = TVal.getValueType();
     return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
   }
 
   // Now we know we're dealing with FP values.
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   assert(LHS.getValueType() == RHS.getValueType());
-  EVT VT = Op.getValueType();
+  EVT VT = TVal.getValueType();
 
   // Try to match this select into a max/min operation, which have dedicated
   // opcode in the instruction set.
@@ -3732,6 +3737,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
   return CS1;
 }
 
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TVal = Op.getOperand(2);
+  SDValue FVal = Op.getOperand(3);
+  SDLoc DL(Op);
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue CCVal = Op->getOperand(0);
+  SDValue TVal = Op->getOperand(1);
+  SDValue FVal = Op->getOperand(2);
+  SDLoc DL(Op);
+
+  unsigned Opc = CCVal.getOpcode();
+  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+  // instruction.
+  if (CCVal.getResNo() == 1 &&
+      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+    // Only lower legal XALUO ops.
+    if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
+      return SDValue();
+
+    AArch64CC::CondCode OFCC;
+    SDValue Value, Overflow;
+    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
+    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+
+    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+                       CCVal, Overflow);
+  }
+
+  // Lower it the same way as we would lower a SELECT_CC node.
+  ISD::CondCode CC;
+  SDValue LHS, RHS;
+  if (CCVal.getOpcode() == ISD::SETCC) {
+    LHS = CCVal.getOperand(0);
+    RHS = CCVal.getOperand(1);
+    CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
+  } else {
+    LHS = CCVal;
+    RHS = DAG.getConstant(0, CCVal.getValueType());
+    CC = ISD::SETNE;
+  }
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
                                               SelectionDAG &DAG) const {
   // Jump table entries as PC relative offsets. No additional tweaking
@@ -3920,7 +3977,7 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
 
   return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
                        Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
-                       8, false, false, MachinePointerInfo(DestSV),
+                       8, false, false, false, MachinePointerInfo(DestSV),
                        MachinePointerInfo(SrcSV));
 }
 
@@ -4989,7 +5046,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
     unsigned Opcode;
     if (EltTy == MVT::i8)
       Opcode = AArch64ISD::DUPLANE8;
-    else if (EltTy == MVT::i16)
+    else if (EltTy == MVT::i16 || EltTy == MVT::f16)
       Opcode = AArch64ISD::DUPLANE16;
     else if (EltTy == MVT::i32 || EltTy == MVT::f32)
       Opcode = AArch64ISD::DUPLANE32;
@@ -6554,6 +6611,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
           VT1.getSizeInBits() <= 32);
 }
 
+bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
+  if (isa<FPExtInst>(Ext))
+    return false;
+
+  // Vector types are next free.
+  if (Ext->getType()->isVectorTy())
+    return false;
+
+  for (const Use &U : Ext->uses()) {
+    // The extension is free if we can fold it with a left shift in an
+    // addressing mode or an arithmetic operation: add, sub, and cmp.
+
+    // Is there a shift?
+    const Instruction *Instr = cast<Instruction>(U.getUser());
+
+    // Is this a constant shift?
+    switch (Instr->getOpcode()) {
+    case Instruction::Shl:
+      if (!isa<ConstantInt>(Instr->getOperand(1)))
+        return false;
+      break;
+    case Instruction::GetElementPtr: {
+      gep_type_iterator GTI = gep_type_begin(Instr);
+      std::advance(GTI, U.getOperandNo());
+      Type *IdxTy = *GTI;
+      // This extension will end up with a shift because of the scaling factor.
+      // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
+      // Get the shift amount based on the scaling factor:
+      // log2(sizeof(IdxTy)) - log2(8).
+      uint64_t ShiftAmt =
+        countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+      // Is the constant foldable in the shift of the addressing mode?
+      // I.e., shift amount is between 1 and 4 inclusive.
+      if (ShiftAmt == 0 || ShiftAmt > 4)
+        return false;
+      break;
+    }
+    case Instruction::Trunc:
+      // Check if this is a noop.
+      // trunc(sext ty1 to ty2) to ty1.
+      if (Instr->getType() == Ext->getOperand(0)->getType())
+        continue;
+    // FALL THROUGH.
+    default:
+      return false;
+    }
+
+    // At this point we can use the bfm family, so this extension is free
+    // for that use.
+  }
+  return true;
+}
+
 bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
                                           unsigned &RequiredAligment) const {
   if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
@@ -6597,7 +6707,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
        (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
     return MVT::f128;
 
-  return Size >= 8 ? MVT::i64 : MVT::i32;
+  if (Size >= 8 &&
+      (memOpAlign(SrcAlign, DstAlign, 8) ||
+       (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+    return MVT::i64;
+
+  if (Size >= 4 &&
+      (memOpAlign(SrcAlign, DstAlign, 4) ||
+       (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+    return MVT::i32;
+
+  return MVT::Other;
 }
 
 // 12-bit optionally shifted immediates are legal for adds.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 5ff11e8..820613b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -355,6 +355,8 @@ public:
   getPreferredVectorAction(EVT VT) const override;
 
 private:
+  bool isExtFreeImpl(const Instruction *Ext) const override;
+
   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
   const AArch64Subtarget *Subtarget;
@@ -418,6 +420,9 @@ private:
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
+                         SDValue TVal, SDValue FVal, SDLoc dl,
+                         SelectionDAG &DAG) const;
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d295c02..0c0efaf 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic,
                   SDPatternOperator OpNode = null_frag> {
   let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   // Add/Subtract immediate
+  // Increase the weight of the immediate variant to try to match it before
+  // the extended register variant.
+  // We used to match the register variant before the immediate when the
+  // register argument could be implicitly zero-extended.
+  let AddedComplexity = 6 in
   def Wri  : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
                            mnemonic, OpNode> {
     let Inst{31} = 0;
   }
+  let AddedComplexity = 6 in
   def Xri  : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
                            mnemonic, OpNode> {
     let Inst{31} = 1;
@@ -3282,6 +3288,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
     : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
   bits<5> Rt;
   bits<5> Rn;
+  let Inst{20-16} = 0b11111;
+  let Unpredictable{20-16} = 0b11111;
+  let Inst{14-10} = 0b11111;
+  let Unpredictable{14-10} = 0b11111;
   let Inst{9-5} = Rn;
   let Inst{4-0} = Rt;
 
@@ -5298,6 +5308,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{4-0}   = Rd;
 }
 
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
+                        dag oops, dag iops, string asm,
+            list<dag> pattern>
+  : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
+    Sched<[WriteV]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  let Inst{31-30} = 0b01;
+  let Inst{29}    = U;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = size;
+  let Inst{21}    = R;
+  let Inst{20-16} = Rm;
+  let Inst{15-11} = opcode;
+  let Inst{10}    = 1;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
 multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
                             SDPatternOperator OpNode> {
   def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
@@ -5325,6 +5356,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
   def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
 }
 
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
+                                 SDPatternOperator OpNode = null_frag> {
+  def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
+                                     (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm), 
+                                     asm, []>;
+  def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst),
+                                     (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm), 
+                                     asm, []>;
+}
+
 multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@@ -5885,7 +5926,7 @@ multiclass SIMDIns {
     let Inst{20-18} = idx;
     let Inst{17-16} = 0b10;
     let Inst{14-12} = idx2;
-    let Inst{11} = 0;
+    let Inst{11} = {?};
   }
   def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
     bits<2> idx;
@@ -5893,7 +5934,7 @@ multiclass SIMDIns {
     let Inst{20-19} = idx;
     let Inst{18-16} = 0b100;
     let Inst{14-13} = idx2;
-    let Inst{12-11} = 0;
+    let Inst{12-11} = {?,?};
   }
   def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
     bits<1> idx;
@@ -5901,7 +5942,7 @@ multiclass SIMDIns {
     let Inst{20} = idx;
     let Inst{19-16} = 0b1000;
     let Inst{14} = idx2;
-    let Inst{13-11} = 0;
+    let Inst{13-11} = {?,?,?};
   }
 
   // For all forms of the INS instruction, the "mov" mnemonic is the
@@ -8517,6 +8558,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
 } // end of 'let Predicates = [HasNEON]'
 
 //----------------------------------------------------------------------------
+// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
+//----------------------------------------------------------------------------
+
+let Predicates = [HasNEON, HasV8_1a] in {
+
+class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
+                                    RegisterOperand regtype, string asm, 
+                                    string kind, list<dag> pattern>
+  : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, 
+                                pattern> {
+  let Inst{21}=0;
+}
+multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
+                                             SDPatternOperator Accum> {
+  def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
+    [(set (v4i16 V64:$dst),
+          (Accum (v4i16 V64:$Rd),
+                 (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
+                                                   (v4i16 V64:$Rm)))))]>;         
+  def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
+    [(set (v8i16 V128:$dst),
+          (Accum (v8i16 V128:$Rd),
+                 (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
+                                                   (v8i16 V128:$Rm)))))]>;
+  def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
+    [(set (v2i32 V64:$dst),
+          (Accum (v2i32 V64:$Rd),
+                 (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
+                                                   (v2i32 V64:$Rm)))))]>;
+  def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
+    [(set (v4i32 V128:$dst),
+          (Accum (v4i32 V128:$Rd),
+                 (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
+                                                   (v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
+                                     SDPatternOperator Accum> {
+  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+                                          V64, V64, V128_lo, VectorIndexH,
+                                          asm, ".4h", ".4h", ".4h", ".h",
+    [(set (v4i16 V64:$dst),
+          (Accum (v4i16 V64:$Rd),
+                 (v4i16 (int_aarch64_neon_sqrdmulh
+                          (v4i16 V64:$Rn),
+                          (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+                                                    VectorIndexH:$idx))))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+                                          V128, V128, V128_lo, VectorIndexH,
+                                          asm, ".8h", ".8h", ".8h", ".h",
+    [(set (v8i16 V128:$dst),
+          (Accum (v8i16 V128:$Rd),
+                 (v8i16 (int_aarch64_neon_sqrdmulh
+                          (v8i16 V128:$Rn),
+                          (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+                                                   VectorIndexH:$idx))))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+                                          V64, V64, V128, VectorIndexS,
+                                          asm, ".2s", ".2s", ".2s", ".s",
+    [(set (v2i32 V64:$dst),
+        (Accum (v2i32 V64:$Rd),
+               (v2i32 (int_aarch64_neon_sqrdmulh
+                        (v2i32 V64:$Rn),
+                        (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+                                                 VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+
+  // FIXME: it would be nice to use the scalar (v1i32) instruction here, but 
+  // an intermediate EXTRACT_SUBREG would be untyped.
+  // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we 
+  // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
+  def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+                       (i32 (vector_extract 
+                               (v4i32 (insert_subvector
+                                       (undef), 
+                                        (v2i32 (int_aarch64_neon_sqrdmulh 
+                                                 (v2i32 V64:$Rn),
+                                                 (v2i32 (AArch64duplane32 
+                                                          (v4i32 V128:$Rm),
+                                                          VectorIndexS:$idx)))),
+                                      (i32 0))),
+                               (i64 0))))),
+            (EXTRACT_SUBREG
+                (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
+                          (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 
+                                                FPR32Op:$Rd, 
+                                                ssub)), 
+                          V64:$Rn,
+                          V128:$Rm, 
+                          VectorIndexS:$idx)),
+                ssub)>;
+
+  def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+                                          V128, V128, V128, VectorIndexS,
+                                          asm, ".4s", ".4s", ".4s", ".s",
+    [(set (v4i32 V128:$dst),
+          (Accum (v4i32 V128:$Rd),
+                 (v4i32 (int_aarch64_neon_sqrdmulh
+                          (v4i32 V128:$Rn),
+                          (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+                                                   VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+
+  // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+  // an intermediate EXTRACT_SUBREG would be untyped.
+  def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+                        (i32 (vector_extract 
+                               (v4i32 (int_aarch64_neon_sqrdmulh 
+                                        (v4i32 V128:$Rn),
+                                        (v4i32 (AArch64duplane32 
+                                                 (v4i32 V128:$Rm),
+                                                 VectorIndexS:$idx)))),
+                               (i64 0))))),
+            (EXTRACT_SUBREG
+                (v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
+                         (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 
+                                               FPR32Op:$Rd, 
+                                               ssub)), 
+                         V128:$Rn,
+                         V128:$Rm, 
+                         VectorIndexS:$idx)),
+                ssub)>;
+
+  def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+                                        FPR16Op, FPR16Op, V128_lo,
+                                        VectorIndexH, asm, ".h", "", "", ".h", 
+                                        []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+                                        FPR32Op, FPR32Op, V128, VectorIndexS,
+                                        asm, ".s", "", "", ".s",
+    [(set (i32 FPR32Op:$dst),
+          (Accum (i32 FPR32Op:$Rd),
+                 (i32 (int_aarch64_neon_sqrdmulh
+                        (i32 FPR32Op:$Rn),
+                        (i32 (vector_extract (v4i32 V128:$Rm),
+                                             VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+}
+} // let Predicates = [HasNeon, HasV8_1a]
+
+//----------------------------------------------------------------------------
 // Crypto extensions
 //----------------------------------------------------------------------------
 
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8e0af2d..db231c4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
   }
 
   for (; SubReg != End; SubReg += Incr) {
-    const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
+    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
@@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
                                       .addReg(SrcReg, getKillRegState(isKill))
                                       .addFrameIndex(FI);
 
@@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
                                       .addReg(DestReg, getDefRegState(true))
                                       .addFrameIndex(FI);
   if (Offset)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ec6fa5c..f7db50a 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,6 +14,8 @@
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
@@ -22,8 +24,6 @@ def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
-def HasV8_1a         : Predicate<"Subtarget->hasV8_1a()">,
-                                 AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def IsCyclone        : Predicate<"Subtarget->isCyclone()">;
@@ -2314,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
 def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
 def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
 
+let Predicates = [HasV8_1a] in {
+  // v8.1a "Limited Order Region" extension load-acquire instructions
+  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
+  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
+  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
+  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
+
+  // v8.1a "Limited Order Region" extension store-release instructions
+  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
+  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
+  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
+  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
+}
+
 //===----------------------------------------------------------------------===//
 // Scaled floating point to integer conversion instructions.
 //===----------------------------------------------------------------------===//
@@ -2778,6 +2792,10 @@ defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
 defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
 defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
+                                                  int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
+                                                    int_aarch64_neon_sqsub>;
 
 defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
 defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
@@ -2994,6 +3012,20 @@ defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
 defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
 defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+let Predicates = [HasV8_1a] in {
+  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
+  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
+  def : Pat<(i32 (int_aarch64_neon_sqadd
+                   (i32 FPR32:$Rd),
+                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+                                                   (i32 FPR32:$Rm))))),
+            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+  def : Pat<(i32 (int_aarch64_neon_sqsub
+                   (i32 FPR32:$Rd),
+                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+                                                   (i32 FPR32:$Rm))))),
+            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+}
 
 def : InstAlias<"cmls $dst, $src1, $src2",
                 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
@@ -3478,13 +3510,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
 // AdvSIMD INS/DUP instructions
 //----------------------------------------------------------------------------
 
-def DUPv8i8gpr  : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
+def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
 
 def DUPv2i64lane : SIMDDup64FromElement;
 def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
@@ -4324,6 +4356,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
                                            int_aarch64_neon_sqadd>;
 defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
                                            int_aarch64_neon_sqsub>;
+defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
+                                          int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
+                                          int_aarch64_neon_sqsub>;
 defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
 defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
     TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 33c11fe..1836682 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   // large enough that referencing from the FP won't result in things being
   // in range relatively often, we can use a base pointer to allow access
   // from the other direction like the SP normally works.
+  // Furthermore, if both variable sized objects are present, and the
+  // stack needs to be dynamically re-aligned, the base pointer is the only
+  // reliable way to reference the locals.
   if (MFI->hasVarSizedObjects()) {
+    if (needsStackRealignment(MF))
+      return true;
     // Conservatively estimate whether the negative offset from the frame
     // pointer will be sufficient to reach. If a function has a smallish
     // frame, it's less likely to have lots of spills and callee saved
@@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   return false;
 }
 
+bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+
+  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+    return false;
+
+  return true;
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool
+AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = MF.getTarget()
+                            .getSubtargetImpl(*MF.getFunction())
+                            ->getFrameLowering()
+                            ->getStackAlignment();
+  bool requiresRealignment =
+      ((MFI->getMaxAlignment() > StackAlign) ||
+       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackAlignment));
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
 unsigned
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index c01bfa5..8c379d9 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -93,6 +93,9 @@ public:
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
+  // Base pointer (stack realignment) support.
+  bool canRealignStack(const MachineFunction &MF) const;
+  bool needsStackRealignment(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 3ec4157..ca4457a 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td"
 // Cortex-A57. The Cortex-A57 types are directly associated with resources, so
 // defining the aliases precludes the need for mapping them using WriteRes. The
 // aliases are sufficient for creating a coarse, working model. As the model
-// evolves, InstRWs will be used to override these SchedAliases.
+// evolves, InstRWs will be used to override some of these SchedAliases.
+//
+// WARNING: Using SchedAliases is convenient and works well for latency and
+//          resource lookup for instructions. However, this creates an entry in
+//          AArch64WriteLatencyTable with a WriteResourceID of 0, breaking
+//          any SchedReadAdvance since the lookup will fail.
 
 def : SchedAlias<WriteImm,   A57Write_1cyc_1I>;
 def : SchedAlias<WriteI,     A57Write_1cyc_1I>;
@@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr,  A57Write_1cyc_1I>;
 def : SchedAlias<WriteIS,    A57Write_1cyc_1I>;
 def : SchedAlias<WriteID32,  A57Write_19cyc_1M>;
 def : SchedAlias<WriteID64,  A57Write_35cyc_1M>;
-def : SchedAlias<WriteIM32,  A57Write_3cyc_1M>;
-def : SchedAlias<WriteIM64,  A57Write_5cyc_1M>;
+def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; }
 def : SchedAlias<WriteBr,    A57Write_1cyc_1B>;
 def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
 def : SchedAlias<WriteLD,    A57Write_4cyc_1L>;
@@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
 def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
 
 
+// Shifted Register with Shift == 0
+// ----------------------------------------------------------------------------
+
+def A57WriteISReg : SchedWriteVariant<[
+       SchedVar<RegShiftedPred, [WriteISReg]>,
+       SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[A57WriteISReg], (instregex ".*rs$")>;
+
+
 // Divide and Multiply Instructions
 // -----------------------------------------------------------------------------
 
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 221d70d..0b97af8 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -47,8 +47,9 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+      HasV8_1aOps(false), 
       HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
-      HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
       IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS)),
       TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index bcab97d..5454b20 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -37,11 +37,12 @@ protected:
   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
   ARMProcFamilyEnum ARMProcFamily;
 
+  bool HasV8_1aOps;
+
   bool HasFPARMv8;
   bool HasNEON;
   bool HasCrypto;
   bool HasCRC;
-  bool HasV8_1a;
 
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
   bool HasZeroCycleRegMove;
@@ -93,6 +94,8 @@ public:
     return isCortexA53() || isCortexA57();
   }
 
+  bool hasV8_1aOps() const { return HasV8_1aOps; }
+
   bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
 
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
@@ -101,7 +104,6 @@ public:
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
-  bool hasV8_1a() const { return HasV8_1a; }
 
   bool isLittleEndian() const { return IsLittle; }
 
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index f902f64..ab28a16 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -87,6 +87,11 @@ EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
              cl::desc("Enable optimizations on complex GEPs"),
              cl::init(true));
 
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
+                  cl::desc("Enable the global merge pass"));
+
 extern "C" void LLVMInitializeAArch64Target() {
   // Register the target.
   RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -245,7 +250,9 @@ bool AArch64PassConfig::addPreISel() {
   // FIXME: On AArch64, this depends on the type.
   // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
   // and the offset has to be a multiple of the related size in bytes.
-  if (TM->getOptLevel() == CodeGenOpt::Aggressive)
+  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+       EnableGlobalMerge == cl::BOU_UNSET) ||
+      EnableGlobalMerge == cl::BOU_TRUE)
     addPass(createGlobalMergePass(TM, 4095));
   if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createAArch64AddressTypePromotionPass());
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1219ffc..063c714 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1972,7 +1972,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
 
     bool Valid;
     auto Mapper = AArch64PRFM::PRFMMapper();
-    StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+    StringRef Name = 
+        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
     Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
                                                       S, getContext()));
     return MatchOperand_Success;
@@ -1985,7 +1986,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
 
   bool Valid;
   auto Mapper = AArch64PRFM::PRFMMapper();
-  unsigned prfop = Mapper.fromString(Tok.getString(), Valid);
+  unsigned prfop = 
+      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
   if (!Valid) {
     TokError("pre-fetch hint expected");
     return MatchOperand_ParseFail;
@@ -2090,15 +2092,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.is(AsmToken::Real)) {
     APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+    if (isNegative)
+      RealVal.changeSign();
+
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
-    // If we had a '-' in front, toggle the sign bit.
-    IntVal ^= (uint64_t)isNegative << 63;
     int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
     Parser.Lex(); // Eat the token.
     // Check for out of range values. As an exception, we let Zero through,
     // as we handle that special case in post-processing before matching in
     // order to use the zero register for it.
-    if (Val == -1 && !RealVal.isZero()) {
+    if (Val == -1 && !RealVal.isPosZero()) {
       TokError("expected compatible register or floating-point constant");
       return MatchOperand_ParseFail;
     }
@@ -2597,7 +2600,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
     }
     bool Valid;
     auto Mapper = AArch64DB::DBarrierMapper();
-    StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+    StringRef Name = 
+        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
     Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
                                                       ExprLoc, getContext()));
     return MatchOperand_Success;
@@ -2610,7 +2614,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
 
   bool Valid;
   auto Mapper = AArch64DB::DBarrierMapper();
-  unsigned Opt = Mapper.fromString(Tok.getString(), Valid);
+  unsigned Opt = 
+      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
   if (!Valid) {
     TokError("invalid barrier option name");
     return MatchOperand_ParseFail;
@@ -2638,18 +2643,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
     return MatchOperand_NoMatch;
 
   bool IsKnown;
-  auto MRSMapper = AArch64SysReg::MRSMapper(STI.getFeatureBits());
-  uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), IsKnown);
+  auto MRSMapper = AArch64SysReg::MRSMapper();
+  uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+                                         IsKnown);
   assert(IsKnown == (MRSReg != -1U) &&
          "register should be -1 if and only if it's unknown");
 
-  auto MSRMapper = AArch64SysReg::MSRMapper(STI.getFeatureBits());
-  uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), IsKnown);
+  auto MSRMapper = AArch64SysReg::MSRMapper();
+  uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+                                         IsKnown);
   assert(IsKnown == (MSRReg != -1U) &&
          "register should be -1 if and only if it's unknown");
 
   auto PStateMapper = AArch64PState::PStateMapper();
-  uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown);
+  uint32_t PStateField = 
+      PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
   assert(IsKnown == (PStateField != -1U) &&
          "register should be -1 if and only if it's unknown");
 
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index fb25089..1c8c0a66 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1102,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::STLRW:
   case AArch64::STLRB:
   case AArch64::STLRH:
+  case AArch64::STLLRW:
+  case AArch64::STLLRB:
+  case AArch64::STLLRH:
+  case AArch64::LDLARW:
+  case AArch64::LDLARB:
+  case AArch64::LDLARH:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   case AArch64::STLXRX:
@@ -1112,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::LDAXRX:
   case AArch64::LDXRX:
   case AArch64::STLRX:
+  case AArch64::LDLARX:
+  case AArch64::STLLRX:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   case AArch64::STLXPW:
@@ -1504,7 +1512,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
   Inst.addOperand(MCOperand::CreateImm(crm));
 
   bool ValidNamed;
-  (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
+  const AArch64Disassembler *Dis = 
+      static_cast<const AArch64Disassembler *>(Decoder);
+  (void)AArch64PState::PStateMapper().toString(pstate_field, 
+      Dis->getSubtargetInfo().getFeatureBits(), ValidNamed);
 
   return ValidNamed ? Success : Fail;
 }
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 46a1d79..febd332 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -34,18 +34,13 @@ using namespace llvm;
 
 AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
                                        const MCInstrInfo &MII,
-                                       const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
+                                       const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI)
-    : AArch64InstPrinter(MAI, MII, MRI, STI) {}
+                                                 const MCRegisterInfo &MRI)
+    : AArch64InstPrinter(MAI, MII, MRI) {}
 
 void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   // This is for .cfi directives.
@@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   // Check for special encodings and print the canonical alias instead.
 
   unsigned Opcode = MI->getOpcode();
@@ -210,8 +206,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
-  if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
+  if (!printAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
 
   printAnnotation(O, Annot);
 }
@@ -614,7 +610,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
 }
 
 void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                        StringRef Annot) {
+                                        StringRef Annot,
+                                        const MCSubtargetInfo &STI) {
   unsigned Opcode = MI->getOpcode();
   StringRef Layout, Mnemonic;
 
@@ -624,7 +621,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
       << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
 
     unsigned ListOpNum = IsTbx ? 2 : 1;
-    printVectorList(MI, ListOpNum, O, "");
+    printVectorList(MI, ListOpNum, STI, O, "");
 
     O << ", "
       << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
@@ -638,7 +635,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     // Now onto the operands: first a vector list with possible lane
     // specifier. E.g. { v0 }[2]
     int OpNum = LdStDesc->ListOperand;
-    printVectorList(MI, OpNum++, O, "");
+    printVectorList(MI, OpNum++, STI, O, "");
 
     if (LdStDesc->HasLane)
       O << '[' << MI->getOperand(OpNum++).getImm() << ']';
@@ -662,7 +659,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
-  AArch64InstPrinter::printInst(MI, O, Annot);
+  AArch64InstPrinter::printInst(MI, O, Annot, STI);
 }
 
 bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
@@ -889,6 +886,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
 }
 
 void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
@@ -903,6 +901,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   O << format("#%#llx", Op.getImm());
@@ -922,6 +921,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isReg() && "Non-register vreg operand!");
@@ -930,6 +930,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
@@ -937,6 +938,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   if (MO.isImm()) {
@@ -946,18 +948,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
         AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
     O << '#' << Val;
     if (Shift != 0)
-      printShifter(MI, OpNum + 1, O);
+      printShifter(MI, OpNum + 1, STI, O);
 
     if (CommentStream)
       *CommentStream << '=' << (Val << Shift) << '\n';
   } else {
     assert(MO.isExpr() && "Unexpected operand type!");
     O << *MO.getExpr();
-    printShifter(MI, OpNum + 1, O);
+    printShifter(MI, OpNum + 1, STI, O);
   }
 }
 
 void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint64_t Val = MI->getOperand(OpNum).getImm();
   O << "#0x";
@@ -965,6 +968,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint64_t Val = MI->getOperand(OpNum).getImm();
   O << "#0x";
@@ -972,6 +976,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNum).getImm();
   // LSL #0 should not be printed.
@@ -983,18 +988,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printShifter(MI, OpNum + 1, O);
+  printShifter(MI, OpNum + 1, STI, O);
 }
 
 void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printArithExtend(MI, OpNum + 1, O);
+  printArithExtend(MI, OpNum + 1, STI, O);
 }
 
 void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNum).getImm();
   AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
@@ -1038,24 +1046,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
   O << AArch64CC::getCondCodeName(CC);
 }
 
 void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
   O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
 }
 
 void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
 }
 
 template<int Scale>
 void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   O << '#' << Scale * MI->getOperand(OpNum).getImm();
 }
@@ -1085,10 +1097,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned prfop = MI->getOperand(OpNum).getImm();
   bool Valid;
-  StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid);
+  StringRef Name = 
+      AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid);
   if (Valid)
     O << Name;
   else
@@ -1096,6 +1110,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   float FPImm =
@@ -1151,6 +1166,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
 }
 
 void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O,
                                          StringRef LayoutSuffix) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
@@ -1193,14 +1209,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
   O << " }";
 }
 
-void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
-  printVectorList(MI, OpNum, O, "");
+void
+AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+                                                   unsigned OpNum,
+                                                   const MCSubtargetInfo &STI,
+                                                   raw_ostream &O) {
+  printVectorList(MI, OpNum, STI, O, "");
 }
 
 template <unsigned NumLanes, char LaneKind>
 void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   std::string Suffix(".");
   if (NumLanes)
@@ -1208,15 +1227,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
   else
     Suffix += LaneKind;
 
-  printVectorList(MI, OpNum, O, Suffix);
+  printVectorList(MI, OpNum, STI, O, Suffix);
 }
 
 void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << "[" << MI->getOperand(OpNum).getImm() << "]";
 }
 
 void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
 
@@ -1241,6 +1262,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
 
@@ -1256,6 +1278,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
   unsigned Opcode = MI->getOpcode();
@@ -1263,9 +1286,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
   bool Valid;
   StringRef Name;
   if (Opcode == AArch64::ISB)
-    Name = AArch64ISB::ISBMapper().toString(Val, Valid);
+    Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), 
+                                            Valid);
   else
-    Name = AArch64DB::DBarrierMapper().toString(Val, Valid);
+    Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), 
+                                                Valid);
   if (Valid)
     O << Name;
   else
@@ -1273,31 +1298,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
-  auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val);
+  auto Mapper = AArch64SysReg::MRSMapper();
+  std::string Name = Mapper.toString(Val, STI.getFeatureBits());
 
   O << StringRef(Name).upper();
 }
 
 void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
-  auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val);
+  auto Mapper = AArch64SysReg::MSRMapper();
+  std::string Name = Mapper.toString(Val, STI.getFeatureBits());
 
   O << StringRef(Name).upper();
 }
 
 void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
   bool Valid;
-  StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid);
+  StringRef Name = 
+      AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid);
   if (Valid)
     O << StringRef(Name.str()).upper();
   else
@@ -1305,6 +1334,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned RawVal = MI->getOperand(OpNo).getImm();
   uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 5f51621..c2077a0 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -26,16 +26,21 @@ class MCOperand;
 class AArch64InstPrinter : public MCInstPrinter {
 public:
   AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                     const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  virtual void printInstruction(const MCInst *MI, raw_ostream &O);
-  virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                                raw_ostream &O);
+  virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                               raw_ostream &O);
   virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                                       unsigned PrintMethodIdx, raw_ostream &O);
+                                       unsigned PrintMethodIdx,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
   virtual StringRef getRegName(unsigned RegNo) const {
     return getRegisterName(RegNo);
   }
@@ -45,90 +50,126 @@ public:
 protected:
   bool printSysAlias(const MCInst *MI, raw_ostream &O);
   // Operand printers
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
   void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
                            raw_ostream &O);
-  template<int Amount>
-  void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+  template <int Amount>
+  void printPostIncOperand(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O) {
     printPostIncOperand(MI, OpNo, Amount, O);
   }
 
-  void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVRegOperand(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSysCROperand(const MCInst *MI, unsigned OpNo,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddSubImm(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShifter(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printArithExtend(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
                       char SrcRegKind, unsigned Width);
   template <char SrcRegKind, unsigned Width>
-  void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  void printMemExtend(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O) {
     printMemExtend(MI, OpNum, O, SrcRegKind, Width);
   }
 
-  void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCondCode(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
   void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
                          raw_ostream &O);
   void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
                         raw_ostream &O);
 
-  template<int Scale>
-  void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  template <int Scale>
+  void printUImm12Offset(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O) {
     printUImm12Offset(MI, OpNum, Scale, O);
   }
 
-  template<int BitWidth>
-  void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  template <int BitWidth>
+  void printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O) {
     printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
   }
 
-  void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
 
-  template<int Scale>
-  void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template <int Scale>
+  void printImmScale(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+  void printVectorList(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O,
                        StringRef LayoutSuffix);
 
   /// Print a list of vector registers where the type suffix is implicit
   /// (i.e. attached to the instruction rather than the registers).
   void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
 
   template <unsigned NumLanes, char LaneKind>
-  void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBarrierOption(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSystemPStateField(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 class AArch64AppleInstPrinter : public AArch64InstPrinter {
 public:
   AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                        const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                          const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
-  void printInstruction(const MCInst *MI, raw_ostream &O) override;
-  bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O) override;
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O) override;
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
                                unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI,
                                raw_ostream &O) override;
   StringRef getRegName(unsigned RegNo) const override {
     return getRegisterName(RegNo);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 84b63a0..e5eb90c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -313,7 +313,7 @@ public:
   DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
       : AArch64AsmBackend(T), MRI(MRI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
                                          MachO::CPU_SUBTYPE_ARM64_ALL);
   }
@@ -461,7 +461,7 @@ public:
   ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
     : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
   }
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 5ea49c3..1f516d1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
 
-  virtual ~AArch64ELFObjectWriter();
+  ~AArch64ELFObjectWriter() override;
 
 protected:
   unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
@@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
   llvm_unreachable("Unimplemented fixup -> relocation");
 }
 
-MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
-                                                 uint8_t OSABI,
-                                                 bool IsLittleEndian) {
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+                                                   uint8_t OSABI,
+                                                   bool IsLittleEndian) {
   MCELFObjectTargetWriter *MOTW =
       new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
   return createELFObjectWriter(MOTW, OS, IsLittleEndian);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8f780d2..540d1fc 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -89,12 +89,12 @@ class AArch64ELFStreamer : public MCELFStreamer {
 public:
   friend class AArch64TargetELFStreamer;
 
-  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                   MCCodeEmitter *Emitter)
+  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                     raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
       : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
         LastEMS(EMS_None) {}
 
-  ~AArch64ELFStreamer() {}
+  ~AArch64ELFStreamer() override {}
 
   void ChangeSection(const MCSection *Section,
                      const MCExpr *Subsection) override {
@@ -211,8 +211,8 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
 }
 
 MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                        raw_ostream &OS, MCCodeEmitter *Emitter,
-                                        bool RelaxAll) {
+                                        raw_pwrite_stream &OS,
+                                        MCCodeEmitter *Emitter, bool RelaxAll) {
   AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 71b05cc..ef48203 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -19,8 +19,8 @@
 namespace llvm {
 
 MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                        raw_ostream &OS, MCCodeEmitter *Emitter,
-                                        bool RelaxAll);
+                                        raw_pwrite_stream &OS,
+                                        MCCodeEmitter *Emitter, bool RelaxAll);
 }
 
 #endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 9ea49f0..fd4dc47 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -40,7 +40,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
 public:
   AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
 
-  ~AArch64MCCodeEmitter() {}
+  ~AArch64MCCodeEmitter() override {}
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
   // binary encoding for an instruction.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 38b399d..afad674 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -109,29 +109,28 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
+                                                 const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new AArch64InstPrinter(MAI, MII, MRI, STI);
+    return new AArch64InstPrinter(MAI, MII, MRI);
   if (SyntaxVariant == 1)
-    return new AArch64AppleInstPrinter(MAI, MII, MRI, STI);
+    return new AArch64AppleInstPrinter(MAI, MII, MRI);
 
   return nullptr;
 }
 
 static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
-                                     MCAsmBackend &TAB, raw_ostream &OS,
+                                     MCAsmBackend &TAB, raw_pwrite_stream &OS,
                                      MCCodeEmitter *Emitter, bool RelaxAll) {
   return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
 }
 
 static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
-                                       raw_ostream &OS, MCCodeEmitter *Emitter,
-                                       bool RelaxAll,
+                                       raw_pwrite_stream &OS,
+                                       MCCodeEmitter *Emitter, bool RelaxAll,
                                        bool DWARFMustBeAtTheEnd) {
   return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
                              DWARFMustBeAtTheEnd,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 7ce303b..4705bdf 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -33,6 +33,7 @@ class StringRef;
 class Target;
 class Triple;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheAArch64leTarget;
 extern Target TheAArch64beTarget;
@@ -48,11 +49,13 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI, StringRef TT,
                                         StringRef CPU);
 
-MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+                                             uint8_t OSABI,
                                              bool IsLittleEndian);
 
-MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
-                                            uint32_t CPUSubtype);
+MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+                                              uint32_t CPUType,
+                                              uint32_t CPUSubtype);
 
 MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 0d9385d..61649c4 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -413,9 +413,9 @@ void AArch64MachObjectWriter::RecordRelocation(
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
-                                                  uint32_t CPUType,
-                                                  uint32_t CPUSubtype) {
+MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+                                                    uint32_t CPUType,
+                                                    uint32_t CPUSubtype) {
   return createMachObjectWriter(
       new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
       /*IsLittleEndian=*/true);
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 160c1c5..8696163 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -18,9 +18,10 @@
 
 using namespace llvm;
 
-StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+StringRef AArch64NamedImmMapper::toString(uint32_t Value, uint64_t FeatureBits,
+                                          bool &Valid) const {
   for (unsigned i = 0; i < NumMappings; ++i) {
-    if (Mappings[i].Value == Value) {
+    if (Mappings[i].isValueEqual(Value, FeatureBits)) {
       Valid = true;
       return Mappings[i].Name;
     }
@@ -30,10 +31,11 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
   return StringRef();
 }
 
-uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+uint32_t AArch64NamedImmMapper::fromString(StringRef Name, uint64_t FeatureBits,
+                                           bool &Valid) const {
   std::string LowerCaseName = Name.lower();
   for (unsigned i = 0; i < NumMappings; ++i) {
-    if (Mappings[i].Name == LowerCaseName) {
+    if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) {
       Valid = true;
       return Mappings[i].Value;
     }
@@ -48,744 +50,776 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
 }
 
 const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
-  {"s1e1r", S1E1R},
-  {"s1e2r", S1E2R},
-  {"s1e3r", S1E3R},
-  {"s1e1w", S1E1W},
-  {"s1e2w", S1E2W},
-  {"s1e3w", S1E3W},
-  {"s1e0r", S1E0R},
-  {"s1e0w", S1E0W},
-  {"s12e1r", S12E1R},
-  {"s12e1w", S12E1W},
-  {"s12e0r", S12E0R},
-  {"s12e0w", S12E0W},
+  {"s1e1r", S1E1R, 0},
+  {"s1e2r", S1E2R, 0},
+  {"s1e3r", S1E3R, 0},
+  {"s1e1w", S1E1W, 0},
+  {"s1e2w", S1E2W, 0},
+  {"s1e3w", S1E3W, 0},
+  {"s1e0r", S1E0R, 0},
+  {"s1e0w", S1E0W, 0},
+  {"s12e1r", S12E1R, 0},
+  {"s12e1w", S12E1W, 0},
+  {"s12e0r", S12E0R, 0},
+  {"s12e0w", S12E0W, 0},
 };
 
 AArch64AT::ATMapper::ATMapper()
   : AArch64NamedImmMapper(ATMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
-  {"oshld", OSHLD},
-  {"oshst", OSHST},
-  {"osh", OSH},
-  {"nshld", NSHLD},
-  {"nshst", NSHST},
-  {"nsh", NSH},
-  {"ishld", ISHLD},
-  {"ishst", ISHST},
-  {"ish", ISH},
-  {"ld", LD},
-  {"st", ST},
-  {"sy", SY}
+  {"oshld", OSHLD, 0},
+  {"oshst", OSHST, 0},
+  {"osh", OSH, 0},
+  {"nshld", NSHLD, 0},
+  {"nshst", NSHST, 0},
+  {"nsh", NSH, 0},
+  {"ishld", ISHLD, 0},
+  {"ishst", ISHST, 0},
+  {"ish", ISH, 0},
+  {"ld", LD, 0},
+  {"st", ST, 0},
+  {"sy", SY, 0}
 };
 
 AArch64DB::DBarrierMapper::DBarrierMapper()
   : AArch64NamedImmMapper(DBarrierMappings, 16u) {}
 
 const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
-  {"zva", ZVA},
-  {"ivac", IVAC},
-  {"isw", ISW},
-  {"cvac", CVAC},
-  {"csw", CSW},
-  {"cvau", CVAU},
-  {"civac", CIVAC},
-  {"cisw", CISW}
+  {"zva", ZVA, 0},
+  {"ivac", IVAC, 0},
+  {"isw", ISW, 0},
+  {"cvac", CVAC, 0},
+  {"csw", CSW, 0},
+  {"cvau", CVAU, 0},
+  {"civac", CIVAC, 0},
+  {"cisw", CISW, 0}
 };
 
 AArch64DC::DCMapper::DCMapper()
   : AArch64NamedImmMapper(DCMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
-  {"ialluis",  IALLUIS},
-  {"iallu", IALLU},
-  {"ivau", IVAU}
+  {"ialluis",  IALLUIS, 0},
+  {"iallu", IALLU, 0},
+  {"ivau", IVAU, 0}
 };
 
 AArch64IC::ICMapper::ICMapper()
   : AArch64NamedImmMapper(ICMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
-  {"sy",  SY},
+  {"sy",  SY, 0},
 };
 
 AArch64ISB::ISBMapper::ISBMapper()
   : AArch64NamedImmMapper(ISBMappings, 16) {}
 
 const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
-  {"pldl1keep", PLDL1KEEP},
-  {"pldl1strm", PLDL1STRM},
-  {"pldl2keep", PLDL2KEEP},
-  {"pldl2strm", PLDL2STRM},
-  {"pldl3keep", PLDL3KEEP},
-  {"pldl3strm", PLDL3STRM},
-  {"plil1keep", PLIL1KEEP},
-  {"plil1strm", PLIL1STRM},
-  {"plil2keep", PLIL2KEEP},
-  {"plil2strm", PLIL2STRM},
-  {"plil3keep", PLIL3KEEP},
-  {"plil3strm", PLIL3STRM},
-  {"pstl1keep", PSTL1KEEP},
-  {"pstl1strm", PSTL1STRM},
-  {"pstl2keep", PSTL2KEEP},
-  {"pstl2strm", PSTL2STRM},
-  {"pstl3keep", PSTL3KEEP},
-  {"pstl3strm", PSTL3STRM}
+  {"pldl1keep", PLDL1KEEP, 0},
+  {"pldl1strm", PLDL1STRM, 0},
+  {"pldl2keep", PLDL2KEEP, 0},
+  {"pldl2strm", PLDL2STRM, 0},
+  {"pldl3keep", PLDL3KEEP, 0},
+  {"pldl3strm", PLDL3STRM, 0},
+  {"plil1keep", PLIL1KEEP, 0},
+  {"plil1strm", PLIL1STRM, 0},
+  {"plil2keep", PLIL2KEEP, 0},
+  {"plil2strm", PLIL2STRM, 0},
+  {"plil3keep", PLIL3KEEP, 0},
+  {"plil3strm", PLIL3STRM, 0},
+  {"pstl1keep", PSTL1KEEP, 0},
+  {"pstl1strm", PSTL1STRM, 0},
+  {"pstl2keep", PSTL2KEEP, 0},
+  {"pstl2strm", PSTL2STRM, 0},
+  {"pstl3keep", PSTL3KEEP, 0},
+  {"pstl3strm", PSTL3STRM, 0}
 };
 
 AArch64PRFM::PRFMMapper::PRFMMapper()
   : AArch64NamedImmMapper(PRFMMappings, 32) {}
 
 const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
-  {"spsel", SPSel},
-  {"daifset", DAIFSet},
-  {"daifclr", DAIFClr}
+  {"spsel", SPSel, 0},
+  {"daifset", DAIFSet, 0},
+  {"daifclr", DAIFClr, 0},
+
+  // v8.1a "Privileged Access Never" extension-specific PStates
+  {"pan", PAN, AArch64::HasV8_1aOps},
 };
 
 AArch64PState::PStateMapper::PStateMapper()
   : AArch64NamedImmMapper(PStateMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
-  {"mdccsr_el0", MDCCSR_EL0},
-  {"dbgdtrrx_el0", DBGDTRRX_EL0},
-  {"mdrar_el1", MDRAR_EL1},
-  {"oslsr_el1", OSLSR_EL1},
-  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
-  {"pmceid0_el0", PMCEID0_EL0},
-  {"pmceid1_el0", PMCEID1_EL0},
-  {"midr_el1", MIDR_EL1},
-  {"ccsidr_el1", CCSIDR_EL1},
-  {"clidr_el1", CLIDR_EL1},
-  {"ctr_el0", CTR_EL0},
-  {"mpidr_el1", MPIDR_EL1},
-  {"revidr_el1", REVIDR_EL1},
-  {"aidr_el1", AIDR_EL1},
-  {"dczid_el0", DCZID_EL0},
-  {"id_pfr0_el1", ID_PFR0_EL1},
-  {"id_pfr1_el1", ID_PFR1_EL1},
-  {"id_dfr0_el1", ID_DFR0_EL1},
-  {"id_afr0_el1", ID_AFR0_EL1},
-  {"id_mmfr0_el1", ID_MMFR0_EL1},
-  {"id_mmfr1_el1", ID_MMFR1_EL1},
-  {"id_mmfr2_el1", ID_MMFR2_EL1},
-  {"id_mmfr3_el1", ID_MMFR3_EL1},
-  {"id_isar0_el1", ID_ISAR0_EL1},
-  {"id_isar1_el1", ID_ISAR1_EL1},
-  {"id_isar2_el1", ID_ISAR2_EL1},
-  {"id_isar3_el1", ID_ISAR3_EL1},
-  {"id_isar4_el1", ID_ISAR4_EL1},
-  {"id_isar5_el1", ID_ISAR5_EL1},
-  {"id_aa64pfr0_el1", ID_A64PFR0_EL1},
-  {"id_aa64pfr1_el1", ID_A64PFR1_EL1},
-  {"id_aa64dfr0_el1", ID_A64DFR0_EL1},
-  {"id_aa64dfr1_el1", ID_A64DFR1_EL1},
-  {"id_aa64afr0_el1", ID_A64AFR0_EL1},
-  {"id_aa64afr1_el1", ID_A64AFR1_EL1},
-  {"id_aa64isar0_el1", ID_A64ISAR0_EL1},
-  {"id_aa64isar1_el1", ID_A64ISAR1_EL1},
-  {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1},
-  {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1},
-  {"mvfr0_el1", MVFR0_EL1},
-  {"mvfr1_el1", MVFR1_EL1},
-  {"mvfr2_el1", MVFR2_EL1},
-  {"rvbar_el1", RVBAR_EL1},
-  {"rvbar_el2", RVBAR_EL2},
-  {"rvbar_el3", RVBAR_EL3},
-  {"isr_el1", ISR_EL1},
-  {"cntpct_el0", CNTPCT_EL0},
-  {"cntvct_el0", CNTVCT_EL0},
+  {"mdccsr_el0", MDCCSR_EL0, 0},
+  {"dbgdtrrx_el0", DBGDTRRX_EL0, 0},
+  {"mdrar_el1", MDRAR_EL1, 0},
+  {"oslsr_el1", OSLSR_EL1, 0},
+  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, 0},
+  {"pmceid0_el0", PMCEID0_EL0, 0},
+  {"pmceid1_el0", PMCEID1_EL0, 0},
+  {"midr_el1", MIDR_EL1, 0},
+  {"ccsidr_el1", CCSIDR_EL1, 0},
+  {"clidr_el1", CLIDR_EL1, 0},
+  {"ctr_el0", CTR_EL0, 0},
+  {"mpidr_el1", MPIDR_EL1, 0},
+  {"revidr_el1", REVIDR_EL1, 0},
+  {"aidr_el1", AIDR_EL1, 0},
+  {"dczid_el0", DCZID_EL0, 0},
+  {"id_pfr0_el1", ID_PFR0_EL1, 0},
+  {"id_pfr1_el1", ID_PFR1_EL1, 0},
+  {"id_dfr0_el1", ID_DFR0_EL1, 0},
+  {"id_afr0_el1", ID_AFR0_EL1, 0},
+  {"id_mmfr0_el1", ID_MMFR0_EL1, 0},
+  {"id_mmfr1_el1", ID_MMFR1_EL1, 0},
+  {"id_mmfr2_el1", ID_MMFR2_EL1, 0},
+  {"id_mmfr3_el1", ID_MMFR3_EL1, 0},
+  {"id_isar0_el1", ID_ISAR0_EL1, 0},
+  {"id_isar1_el1", ID_ISAR1_EL1, 0},
+  {"id_isar2_el1", ID_ISAR2_EL1, 0},
+  {"id_isar3_el1", ID_ISAR3_EL1, 0},
+  {"id_isar4_el1", ID_ISAR4_EL1, 0},
+  {"id_isar5_el1", ID_ISAR5_EL1, 0},
+  {"id_aa64pfr0_el1", ID_A64PFR0_EL1, 0},
+  {"id_aa64pfr1_el1", ID_A64PFR1_EL1, 0},
+  {"id_aa64dfr0_el1", ID_A64DFR0_EL1, 0},
+  {"id_aa64dfr1_el1", ID_A64DFR1_EL1, 0},
+  {"id_aa64afr0_el1", ID_A64AFR0_EL1, 0},
+  {"id_aa64afr1_el1", ID_A64AFR1_EL1, 0},
+  {"id_aa64isar0_el1", ID_A64ISAR0_EL1, 0},
+  {"id_aa64isar1_el1", ID_A64ISAR1_EL1, 0},
+  {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, 0},
+  {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, 0},
+  {"mvfr0_el1", MVFR0_EL1, 0},
+  {"mvfr1_el1", MVFR1_EL1, 0},
+  {"mvfr2_el1", MVFR2_EL1, 0},
+  {"rvbar_el1", RVBAR_EL1, 0},
+  {"rvbar_el2", RVBAR_EL2, 0},
+  {"rvbar_el3", RVBAR_EL3, 0},
+  {"isr_el1", ISR_EL1, 0},
+  {"cntpct_el0", CNTPCT_EL0, 0},
+  {"cntvct_el0", CNTVCT_EL0, 0},
 
   // Trace registers
-  {"trcstatr", TRCSTATR},
-  {"trcidr8", TRCIDR8},
-  {"trcidr9", TRCIDR9},
-  {"trcidr10", TRCIDR10},
-  {"trcidr11", TRCIDR11},
-  {"trcidr12", TRCIDR12},
-  {"trcidr13", TRCIDR13},
-  {"trcidr0", TRCIDR0},
-  {"trcidr1", TRCIDR1},
-  {"trcidr2", TRCIDR2},
-  {"trcidr3", TRCIDR3},
-  {"trcidr4", TRCIDR4},
-  {"trcidr5", TRCIDR5},
-  {"trcidr6", TRCIDR6},
-  {"trcidr7", TRCIDR7},
-  {"trcoslsr", TRCOSLSR},
-  {"trcpdsr", TRCPDSR},
-  {"trcdevaff0", TRCDEVAFF0},
-  {"trcdevaff1", TRCDEVAFF1},
-  {"trclsr", TRCLSR},
-  {"trcauthstatus", TRCAUTHSTATUS},
-  {"trcdevarch", TRCDEVARCH},
-  {"trcdevid", TRCDEVID},
-  {"trcdevtype", TRCDEVTYPE},
-  {"trcpidr4", TRCPIDR4},
-  {"trcpidr5", TRCPIDR5},
-  {"trcpidr6", TRCPIDR6},
-  {"trcpidr7", TRCPIDR7},
-  {"trcpidr0", TRCPIDR0},
-  {"trcpidr1", TRCPIDR1},
-  {"trcpidr2", TRCPIDR2},
-  {"trcpidr3", TRCPIDR3},
-  {"trccidr0", TRCCIDR0},
-  {"trccidr1", TRCCIDR1},
-  {"trccidr2", TRCCIDR2},
-  {"trccidr3", TRCCIDR3},
+  {"trcstatr", TRCSTATR, 0},
+  {"trcidr8", TRCIDR8, 0},
+  {"trcidr9", TRCIDR9, 0},
+  {"trcidr10", TRCIDR10, 0},
+  {"trcidr11", TRCIDR11, 0},
+  {"trcidr12", TRCIDR12, 0},
+  {"trcidr13", TRCIDR13, 0},
+  {"trcidr0", TRCIDR0, 0},
+  {"trcidr1", TRCIDR1, 0},
+  {"trcidr2", TRCIDR2, 0},
+  {"trcidr3", TRCIDR3, 0},
+  {"trcidr4", TRCIDR4, 0},
+  {"trcidr5", TRCIDR5, 0},
+  {"trcidr6", TRCIDR6, 0},
+  {"trcidr7", TRCIDR7, 0},
+  {"trcoslsr", TRCOSLSR, 0},
+  {"trcpdsr", TRCPDSR, 0},
+  {"trcdevaff0", TRCDEVAFF0, 0},
+  {"trcdevaff1", TRCDEVAFF1, 0},
+  {"trclsr", TRCLSR, 0},
+  {"trcauthstatus", TRCAUTHSTATUS, 0},
+  {"trcdevarch", TRCDEVARCH, 0},
+  {"trcdevid", TRCDEVID, 0},
+  {"trcdevtype", TRCDEVTYPE, 0},
+  {"trcpidr4", TRCPIDR4, 0},
+  {"trcpidr5", TRCPIDR5, 0},
+  {"trcpidr6", TRCPIDR6, 0},
+  {"trcpidr7", TRCPIDR7, 0},
+  {"trcpidr0", TRCPIDR0, 0},
+  {"trcpidr1", TRCPIDR1, 0},
+  {"trcpidr2", TRCPIDR2, 0},
+  {"trcpidr3", TRCPIDR3, 0},
+  {"trccidr0", TRCCIDR0, 0},
+  {"trccidr1", TRCCIDR1, 0},
+  {"trccidr2", TRCCIDR2, 0},
+  {"trccidr3", TRCCIDR3, 0},
 
   // GICv3 registers
-  {"icc_iar1_el1", ICC_IAR1_EL1},
-  {"icc_iar0_el1", ICC_IAR0_EL1},
-  {"icc_hppir1_el1", ICC_HPPIR1_EL1},
-  {"icc_hppir0_el1", ICC_HPPIR0_EL1},
-  {"icc_rpr_el1", ICC_RPR_EL1},
-  {"ich_vtr_el2", ICH_VTR_EL2},
-  {"ich_eisr_el2", ICH_EISR_EL2},
-  {"ich_elsr_el2", ICH_ELSR_EL2}
+  {"icc_iar1_el1", ICC_IAR1_EL1, 0},
+  {"icc_iar0_el1", ICC_IAR0_EL1, 0},
+  {"icc_hppir1_el1", ICC_HPPIR1_EL1, 0},
+  {"icc_hppir0_el1", ICC_HPPIR0_EL1, 0},
+  {"icc_rpr_el1", ICC_RPR_EL1, 0},
+  {"ich_vtr_el2", ICH_VTR_EL2, 0},
+  {"ich_eisr_el2", ICH_EISR_EL2, 0},
+  {"ich_elsr_el2", ICH_ELSR_EL2, 0}
 };
 
-AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
-  : SysRegMapper(FeatureBits) {
+AArch64SysReg::MRSMapper::MRSMapper() {
     InstMappings = &MRSMappings[0];
     NumInstMappings = llvm::array_lengthof(MRSMappings);
 }
 
 const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
-  {"dbgdtrtx_el0", DBGDTRTX_EL0},
-  {"oslar_el1", OSLAR_EL1},
-  {"pmswinc_el0", PMSWINC_EL0},
+  {"dbgdtrtx_el0", DBGDTRTX_EL0, 0},
+  {"oslar_el1", OSLAR_EL1, 0},
+  {"pmswinc_el0", PMSWINC_EL0, 0},
 
   // Trace registers
-  {"trcoslar", TRCOSLAR},
-  {"trclar", TRCLAR},
+  {"trcoslar", TRCOSLAR, 0},
+  {"trclar", TRCLAR, 0},
 
   // GICv3 registers
-  {"icc_eoir1_el1", ICC_EOIR1_EL1},
-  {"icc_eoir0_el1", ICC_EOIR0_EL1},
-  {"icc_dir_el1", ICC_DIR_EL1},
-  {"icc_sgi1r_el1", ICC_SGI1R_EL1},
-  {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
-  {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+  {"icc_eoir1_el1", ICC_EOIR1_EL1, 0},
+  {"icc_eoir0_el1", ICC_EOIR0_EL1, 0},
+  {"icc_dir_el1", ICC_DIR_EL1, 0},
+  {"icc_sgi1r_el1", ICC_SGI1R_EL1, 0},
+  {"icc_asgi1r_el1", ICC_ASGI1R_EL1, 0},
+  {"icc_sgi0r_el1", ICC_SGI0R_EL1, 0},
+
+  // v8.1a "Privileged Access Never" extension-specific system registers
+  {"pan", PAN, AArch64::HasV8_1aOps},
 };
 
-AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
-  : SysRegMapper(FeatureBits) {
+AArch64SysReg::MSRMapper::MSRMapper() {
     InstMappings = &MSRMappings[0];
     NumInstMappings = llvm::array_lengthof(MSRMappings);
 }
 
 
 const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
-  {"osdtrrx_el1", OSDTRRX_EL1},
-  {"osdtrtx_el1",  OSDTRTX_EL1},
-  {"teecr32_el1", TEECR32_EL1},
-  {"mdccint_el1", MDCCINT_EL1},
-  {"mdscr_el1", MDSCR_EL1},
-  {"dbgdtr_el0", DBGDTR_EL0},
-  {"oseccr_el1", OSECCR_EL1},
-  {"dbgvcr32_el2", DBGVCR32_EL2},
-  {"dbgbvr0_el1", DBGBVR0_EL1},
-  {"dbgbvr1_el1", DBGBVR1_EL1},
-  {"dbgbvr2_el1", DBGBVR2_EL1},
-  {"dbgbvr3_el1", DBGBVR3_EL1},
-  {"dbgbvr4_el1", DBGBVR4_EL1},
-  {"dbgbvr5_el1", DBGBVR5_EL1},
-  {"dbgbvr6_el1", DBGBVR6_EL1},
-  {"dbgbvr7_el1", DBGBVR7_EL1},
-  {"dbgbvr8_el1", DBGBVR8_EL1},
-  {"dbgbvr9_el1", DBGBVR9_EL1},
-  {"dbgbvr10_el1", DBGBVR10_EL1},
-  {"dbgbvr11_el1", DBGBVR11_EL1},
-  {"dbgbvr12_el1", DBGBVR12_EL1},
-  {"dbgbvr13_el1", DBGBVR13_EL1},
-  {"dbgbvr14_el1", DBGBVR14_EL1},
-  {"dbgbvr15_el1", DBGBVR15_EL1},
-  {"dbgbcr0_el1", DBGBCR0_EL1},
-  {"dbgbcr1_el1", DBGBCR1_EL1},
-  {"dbgbcr2_el1", DBGBCR2_EL1},
-  {"dbgbcr3_el1", DBGBCR3_EL1},
-  {"dbgbcr4_el1", DBGBCR4_EL1},
-  {"dbgbcr5_el1", DBGBCR5_EL1},
-  {"dbgbcr6_el1", DBGBCR6_EL1},
-  {"dbgbcr7_el1", DBGBCR7_EL1},
-  {"dbgbcr8_el1", DBGBCR8_EL1},
-  {"dbgbcr9_el1", DBGBCR9_EL1},
-  {"dbgbcr10_el1", DBGBCR10_EL1},
-  {"dbgbcr11_el1", DBGBCR11_EL1},
-  {"dbgbcr12_el1", DBGBCR12_EL1},
-  {"dbgbcr13_el1", DBGBCR13_EL1},
-  {"dbgbcr14_el1", DBGBCR14_EL1},
-  {"dbgbcr15_el1", DBGBCR15_EL1},
-  {"dbgwvr0_el1", DBGWVR0_EL1},
-  {"dbgwvr1_el1", DBGWVR1_EL1},
-  {"dbgwvr2_el1", DBGWVR2_EL1},
-  {"dbgwvr3_el1", DBGWVR3_EL1},
-  {"dbgwvr4_el1", DBGWVR4_EL1},
-  {"dbgwvr5_el1", DBGWVR5_EL1},
-  {"dbgwvr6_el1", DBGWVR6_EL1},
-  {"dbgwvr7_el1", DBGWVR7_EL1},
-  {"dbgwvr8_el1", DBGWVR8_EL1},
-  {"dbgwvr9_el1", DBGWVR9_EL1},
-  {"dbgwvr10_el1", DBGWVR10_EL1},
-  {"dbgwvr11_el1", DBGWVR11_EL1},
-  {"dbgwvr12_el1", DBGWVR12_EL1},
-  {"dbgwvr13_el1", DBGWVR13_EL1},
-  {"dbgwvr14_el1", DBGWVR14_EL1},
-  {"dbgwvr15_el1", DBGWVR15_EL1},
-  {"dbgwcr0_el1", DBGWCR0_EL1},
-  {"dbgwcr1_el1", DBGWCR1_EL1},
-  {"dbgwcr2_el1", DBGWCR2_EL1},
-  {"dbgwcr3_el1", DBGWCR3_EL1},
-  {"dbgwcr4_el1", DBGWCR4_EL1},
-  {"dbgwcr5_el1", DBGWCR5_EL1},
-  {"dbgwcr6_el1", DBGWCR6_EL1},
-  {"dbgwcr7_el1", DBGWCR7_EL1},
-  {"dbgwcr8_el1", DBGWCR8_EL1},
-  {"dbgwcr9_el1", DBGWCR9_EL1},
-  {"dbgwcr10_el1", DBGWCR10_EL1},
-  {"dbgwcr11_el1", DBGWCR11_EL1},
-  {"dbgwcr12_el1", DBGWCR12_EL1},
-  {"dbgwcr13_el1", DBGWCR13_EL1},
-  {"dbgwcr14_el1", DBGWCR14_EL1},
-  {"dbgwcr15_el1", DBGWCR15_EL1},
-  {"teehbr32_el1", TEEHBR32_EL1},
-  {"osdlr_el1", OSDLR_EL1},
-  {"dbgprcr_el1", DBGPRCR_EL1},
-  {"dbgclaimset_el1", DBGCLAIMSET_EL1},
-  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
-  {"csselr_el1", CSSELR_EL1},
-  {"vpidr_el2", VPIDR_EL2},
-  {"vmpidr_el2", VMPIDR_EL2},
-  {"sctlr_el1", SCTLR_EL1},
-  {"sctlr_el2", SCTLR_EL2},
-  {"sctlr_el3", SCTLR_EL3},
-  {"actlr_el1", ACTLR_EL1},
-  {"actlr_el2", ACTLR_EL2},
-  {"actlr_el3", ACTLR_EL3},
-  {"cpacr_el1", CPACR_EL1},
-  {"hcr_el2", HCR_EL2},
-  {"scr_el3", SCR_EL3},
-  {"mdcr_el2", MDCR_EL2},
-  {"sder32_el3", SDER32_EL3},
-  {"cptr_el2", CPTR_EL2},
-  {"cptr_el3", CPTR_EL3},
-  {"hstr_el2", HSTR_EL2},
-  {"hacr_el2", HACR_EL2},
-  {"mdcr_el3", MDCR_EL3},
-  {"ttbr0_el1", TTBR0_EL1},
-  {"ttbr0_el2", TTBR0_EL2},
-  {"ttbr0_el3", TTBR0_EL3},
-  {"ttbr1_el1", TTBR1_EL1},
-  {"tcr_el1", TCR_EL1},
-  {"tcr_el2", TCR_EL2},
-  {"tcr_el3", TCR_EL3},
-  {"vttbr_el2", VTTBR_EL2},
-  {"vtcr_el2", VTCR_EL2},
-  {"dacr32_el2", DACR32_EL2},
-  {"spsr_el1", SPSR_EL1},
-  {"spsr_el2", SPSR_EL2},
-  {"spsr_el3", SPSR_EL3},
-  {"elr_el1", ELR_EL1},
-  {"elr_el2", ELR_EL2},
-  {"elr_el3", ELR_EL3},
-  {"sp_el0", SP_EL0},
-  {"sp_el1", SP_EL1},
-  {"sp_el2", SP_EL2},
-  {"spsel", SPSel},
-  {"nzcv", NZCV},
-  {"daif", DAIF},
-  {"currentel", CurrentEL},
-  {"spsr_irq", SPSR_irq},
-  {"spsr_abt", SPSR_abt},
-  {"spsr_und", SPSR_und},
-  {"spsr_fiq", SPSR_fiq},
-  {"fpcr", FPCR},
-  {"fpsr", FPSR},
-  {"dspsr_el0", DSPSR_EL0},
-  {"dlr_el0", DLR_EL0},
-  {"ifsr32_el2", IFSR32_EL2},
-  {"afsr0_el1", AFSR0_EL1},
-  {"afsr0_el2", AFSR0_EL2},
-  {"afsr0_el3", AFSR0_EL3},
-  {"afsr1_el1", AFSR1_EL1},
-  {"afsr1_el2", AFSR1_EL2},
-  {"afsr1_el3", AFSR1_EL3},
-  {"esr_el1", ESR_EL1},
-  {"esr_el2", ESR_EL2},
-  {"esr_el3", ESR_EL3},
-  {"fpexc32_el2", FPEXC32_EL2},
-  {"far_el1", FAR_EL1},
-  {"far_el2", FAR_EL2},
-  {"far_el3", FAR_EL3},
-  {"hpfar_el2", HPFAR_EL2},
-  {"par_el1", PAR_EL1},
-  {"pmcr_el0", PMCR_EL0},
-  {"pmcntenset_el0", PMCNTENSET_EL0},
-  {"pmcntenclr_el0", PMCNTENCLR_EL0},
-  {"pmovsclr_el0", PMOVSCLR_EL0},
-  {"pmselr_el0", PMSELR_EL0},
-  {"pmccntr_el0", PMCCNTR_EL0},
-  {"pmxevtyper_el0", PMXEVTYPER_EL0},
-  {"pmxevcntr_el0", PMXEVCNTR_EL0},
-  {"pmuserenr_el0", PMUSERENR_EL0},
-  {"pmintenset_el1", PMINTENSET_EL1},
-  {"pmintenclr_el1", PMINTENCLR_EL1},
-  {"pmovsset_el0", PMOVSSET_EL0},
-  {"mair_el1", MAIR_EL1},
-  {"mair_el2", MAIR_EL2},
-  {"mair_el3", MAIR_EL3},
-  {"amair_el1", AMAIR_EL1},
-  {"amair_el2", AMAIR_EL2},
-  {"amair_el3", AMAIR_EL3},
-  {"vbar_el1", VBAR_EL1},
-  {"vbar_el2", VBAR_EL2},
-  {"vbar_el3", VBAR_EL3},
-  {"rmr_el1", RMR_EL1},
-  {"rmr_el2", RMR_EL2},
-  {"rmr_el3", RMR_EL3},
-  {"contextidr_el1", CONTEXTIDR_EL1},
-  {"tpidr_el0", TPIDR_EL0},
-  {"tpidr_el2", TPIDR_EL2},
-  {"tpidr_el3", TPIDR_EL3},
-  {"tpidrro_el0", TPIDRRO_EL0},
-  {"tpidr_el1", TPIDR_EL1},
-  {"cntfrq_el0", CNTFRQ_EL0},
-  {"cntvoff_el2", CNTVOFF_EL2},
-  {"cntkctl_el1", CNTKCTL_EL1},
-  {"cnthctl_el2", CNTHCTL_EL2},
-  {"cntp_tval_el0", CNTP_TVAL_EL0},
-  {"cnthp_tval_el2", CNTHP_TVAL_EL2},
-  {"cntps_tval_el1", CNTPS_TVAL_EL1},
-  {"cntp_ctl_el0", CNTP_CTL_EL0},
-  {"cnthp_ctl_el2", CNTHP_CTL_EL2},
-  {"cntps_ctl_el1", CNTPS_CTL_EL1},
-  {"cntp_cval_el0", CNTP_CVAL_EL0},
-  {"cnthp_cval_el2", CNTHP_CVAL_EL2},
-  {"cntps_cval_el1", CNTPS_CVAL_EL1},
-  {"cntv_tval_el0", CNTV_TVAL_EL0},
-  {"cntv_ctl_el0", CNTV_CTL_EL0},
-  {"cntv_cval_el0", CNTV_CVAL_EL0},
-  {"pmevcntr0_el0", PMEVCNTR0_EL0},
-  {"pmevcntr1_el0", PMEVCNTR1_EL0},
-  {"pmevcntr2_el0", PMEVCNTR2_EL0},
-  {"pmevcntr3_el0", PMEVCNTR3_EL0},
-  {"pmevcntr4_el0", PMEVCNTR4_EL0},
-  {"pmevcntr5_el0", PMEVCNTR5_EL0},
-  {"pmevcntr6_el0", PMEVCNTR6_EL0},
-  {"pmevcntr7_el0", PMEVCNTR7_EL0},
-  {"pmevcntr8_el0", PMEVCNTR8_EL0},
-  {"pmevcntr9_el0", PMEVCNTR9_EL0},
-  {"pmevcntr10_el0", PMEVCNTR10_EL0},
-  {"pmevcntr11_el0", PMEVCNTR11_EL0},
-  {"pmevcntr12_el0", PMEVCNTR12_EL0},
-  {"pmevcntr13_el0", PMEVCNTR13_EL0},
-  {"pmevcntr14_el0", PMEVCNTR14_EL0},
-  {"pmevcntr15_el0", PMEVCNTR15_EL0},
-  {"pmevcntr16_el0", PMEVCNTR16_EL0},
-  {"pmevcntr17_el0", PMEVCNTR17_EL0},
-  {"pmevcntr18_el0", PMEVCNTR18_EL0},
-  {"pmevcntr19_el0", PMEVCNTR19_EL0},
-  {"pmevcntr20_el0", PMEVCNTR20_EL0},
-  {"pmevcntr21_el0", PMEVCNTR21_EL0},
-  {"pmevcntr22_el0", PMEVCNTR22_EL0},
-  {"pmevcntr23_el0", PMEVCNTR23_EL0},
-  {"pmevcntr24_el0", PMEVCNTR24_EL0},
-  {"pmevcntr25_el0", PMEVCNTR25_EL0},
-  {"pmevcntr26_el0", PMEVCNTR26_EL0},
-  {"pmevcntr27_el0", PMEVCNTR27_EL0},
-  {"pmevcntr28_el0", PMEVCNTR28_EL0},
-  {"pmevcntr29_el0", PMEVCNTR29_EL0},
-  {"pmevcntr30_el0", PMEVCNTR30_EL0},
-  {"pmccfiltr_el0", PMCCFILTR_EL0},
-  {"pmevtyper0_el0", PMEVTYPER0_EL0},
-  {"pmevtyper1_el0", PMEVTYPER1_EL0},
-  {"pmevtyper2_el0", PMEVTYPER2_EL0},
-  {"pmevtyper3_el0", PMEVTYPER3_EL0},
-  {"pmevtyper4_el0", PMEVTYPER4_EL0},
-  {"pmevtyper5_el0", PMEVTYPER5_EL0},
-  {"pmevtyper6_el0", PMEVTYPER6_EL0},
-  {"pmevtyper7_el0", PMEVTYPER7_EL0},
-  {"pmevtyper8_el0", PMEVTYPER8_EL0},
-  {"pmevtyper9_el0", PMEVTYPER9_EL0},
-  {"pmevtyper10_el0", PMEVTYPER10_EL0},
-  {"pmevtyper11_el0", PMEVTYPER11_EL0},
-  {"pmevtyper12_el0", PMEVTYPER12_EL0},
-  {"pmevtyper13_el0", PMEVTYPER13_EL0},
-  {"pmevtyper14_el0", PMEVTYPER14_EL0},
-  {"pmevtyper15_el0", PMEVTYPER15_EL0},
-  {"pmevtyper16_el0", PMEVTYPER16_EL0},
-  {"pmevtyper17_el0", PMEVTYPER17_EL0},
-  {"pmevtyper18_el0", PMEVTYPER18_EL0},
-  {"pmevtyper19_el0", PMEVTYPER19_EL0},
-  {"pmevtyper20_el0", PMEVTYPER20_EL0},
-  {"pmevtyper21_el0", PMEVTYPER21_EL0},
-  {"pmevtyper22_el0", PMEVTYPER22_EL0},
-  {"pmevtyper23_el0", PMEVTYPER23_EL0},
-  {"pmevtyper24_el0", PMEVTYPER24_EL0},
-  {"pmevtyper25_el0", PMEVTYPER25_EL0},
-  {"pmevtyper26_el0", PMEVTYPER26_EL0},
-  {"pmevtyper27_el0", PMEVTYPER27_EL0},
-  {"pmevtyper28_el0", PMEVTYPER28_EL0},
-  {"pmevtyper29_el0", PMEVTYPER29_EL0},
-  {"pmevtyper30_el0", PMEVTYPER30_EL0},
+  {"osdtrrx_el1", OSDTRRX_EL1, 0},
+  {"osdtrtx_el1",  OSDTRTX_EL1, 0},
+  {"teecr32_el1", TEECR32_EL1, 0},
+  {"mdccint_el1", MDCCINT_EL1, 0},
+  {"mdscr_el1", MDSCR_EL1, 0},
+  {"dbgdtr_el0", DBGDTR_EL0, 0},
+  {"oseccr_el1", OSECCR_EL1, 0},
+  {"dbgvcr32_el2", DBGVCR32_EL2, 0},
+  {"dbgbvr0_el1", DBGBVR0_EL1, 0},
+  {"dbgbvr1_el1", DBGBVR1_EL1, 0},
+  {"dbgbvr2_el1", DBGBVR2_EL1, 0},
+  {"dbgbvr3_el1", DBGBVR3_EL1, 0},
+  {"dbgbvr4_el1", DBGBVR4_EL1, 0},
+  {"dbgbvr5_el1", DBGBVR5_EL1, 0},
+  {"dbgbvr6_el1", DBGBVR6_EL1, 0},
+  {"dbgbvr7_el1", DBGBVR7_EL1, 0},
+  {"dbgbvr8_el1", DBGBVR8_EL1, 0},
+  {"dbgbvr9_el1", DBGBVR9_EL1, 0},
+  {"dbgbvr10_el1", DBGBVR10_EL1, 0},
+  {"dbgbvr11_el1", DBGBVR11_EL1, 0},
+  {"dbgbvr12_el1", DBGBVR12_EL1, 0},
+  {"dbgbvr13_el1", DBGBVR13_EL1, 0},
+  {"dbgbvr14_el1", DBGBVR14_EL1, 0},
+  {"dbgbvr15_el1", DBGBVR15_EL1, 0},
+  {"dbgbcr0_el1", DBGBCR0_EL1, 0},
+  {"dbgbcr1_el1", DBGBCR1_EL1, 0},
+  {"dbgbcr2_el1", DBGBCR2_EL1, 0},
+  {"dbgbcr3_el1", DBGBCR3_EL1, 0},
+  {"dbgbcr4_el1", DBGBCR4_EL1, 0},
+  {"dbgbcr5_el1", DBGBCR5_EL1, 0},
+  {"dbgbcr6_el1", DBGBCR6_EL1, 0},
+  {"dbgbcr7_el1", DBGBCR7_EL1, 0},
+  {"dbgbcr8_el1", DBGBCR8_EL1, 0},
+  {"dbgbcr9_el1", DBGBCR9_EL1, 0},
+  {"dbgbcr10_el1", DBGBCR10_EL1, 0},
+  {"dbgbcr11_el1", DBGBCR11_EL1, 0},
+  {"dbgbcr12_el1", DBGBCR12_EL1, 0},
+  {"dbgbcr13_el1", DBGBCR13_EL1, 0},
+  {"dbgbcr14_el1", DBGBCR14_EL1, 0},
+  {"dbgbcr15_el1", DBGBCR15_EL1, 0},
+  {"dbgwvr0_el1", DBGWVR0_EL1, 0},
+  {"dbgwvr1_el1", DBGWVR1_EL1, 0},
+  {"dbgwvr2_el1", DBGWVR2_EL1, 0},
+  {"dbgwvr3_el1", DBGWVR3_EL1, 0},
+  {"dbgwvr4_el1", DBGWVR4_EL1, 0},
+  {"dbgwvr5_el1", DBGWVR5_EL1, 0},
+  {"dbgwvr6_el1", DBGWVR6_EL1, 0},
+  {"dbgwvr7_el1", DBGWVR7_EL1, 0},
+  {"dbgwvr8_el1", DBGWVR8_EL1, 0},
+  {"dbgwvr9_el1", DBGWVR9_EL1, 0},
+  {"dbgwvr10_el1", DBGWVR10_EL1, 0},
+  {"dbgwvr11_el1", DBGWVR11_EL1, 0},
+  {"dbgwvr12_el1", DBGWVR12_EL1, 0},
+  {"dbgwvr13_el1", DBGWVR13_EL1, 0},
+  {"dbgwvr14_el1", DBGWVR14_EL1, 0},
+  {"dbgwvr15_el1", DBGWVR15_EL1, 0},
+  {"dbgwcr0_el1", DBGWCR0_EL1, 0},
+  {"dbgwcr1_el1", DBGWCR1_EL1, 0},
+  {"dbgwcr2_el1", DBGWCR2_EL1, 0},
+  {"dbgwcr3_el1", DBGWCR3_EL1, 0},
+  {"dbgwcr4_el1", DBGWCR4_EL1, 0},
+  {"dbgwcr5_el1", DBGWCR5_EL1, 0},
+  {"dbgwcr6_el1", DBGWCR6_EL1, 0},
+  {"dbgwcr7_el1", DBGWCR7_EL1, 0},
+  {"dbgwcr8_el1", DBGWCR8_EL1, 0},
+  {"dbgwcr9_el1", DBGWCR9_EL1, 0},
+  {"dbgwcr10_el1", DBGWCR10_EL1, 0},
+  {"dbgwcr11_el1", DBGWCR11_EL1, 0},
+  {"dbgwcr12_el1", DBGWCR12_EL1, 0},
+  {"dbgwcr13_el1", DBGWCR13_EL1, 0},
+  {"dbgwcr14_el1", DBGWCR14_EL1, 0},
+  {"dbgwcr15_el1", DBGWCR15_EL1, 0},
+  {"teehbr32_el1", TEEHBR32_EL1, 0},
+  {"osdlr_el1", OSDLR_EL1, 0},
+  {"dbgprcr_el1", DBGPRCR_EL1, 0},
+  {"dbgclaimset_el1", DBGCLAIMSET_EL1, 0},
+  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, 0},
+  {"csselr_el1", CSSELR_EL1, 0},
+  {"vpidr_el2", VPIDR_EL2, 0},
+  {"vmpidr_el2", VMPIDR_EL2, 0},
+  {"sctlr_el1", SCTLR_EL1, 0},
+  {"sctlr_el2", SCTLR_EL2, 0},
+  {"sctlr_el3", SCTLR_EL3, 0},
+  {"actlr_el1", ACTLR_EL1, 0},
+  {"actlr_el2", ACTLR_EL2, 0},
+  {"actlr_el3", ACTLR_EL3, 0},
+  {"cpacr_el1", CPACR_EL1, 0},
+  {"hcr_el2", HCR_EL2, 0},
+  {"scr_el3", SCR_EL3, 0},
+  {"mdcr_el2", MDCR_EL2, 0},
+  {"sder32_el3", SDER32_EL3, 0},
+  {"cptr_el2", CPTR_EL2, 0},
+  {"cptr_el3", CPTR_EL3, 0},
+  {"hstr_el2", HSTR_EL2, 0},
+  {"hacr_el2", HACR_EL2, 0},
+  {"mdcr_el3", MDCR_EL3, 0},
+  {"ttbr0_el1", TTBR0_EL1, 0},
+  {"ttbr0_el2", TTBR0_EL2, 0},
+  {"ttbr0_el3", TTBR0_EL3, 0},
+  {"ttbr1_el1", TTBR1_EL1, 0},
+  {"tcr_el1", TCR_EL1, 0},
+  {"tcr_el2", TCR_EL2, 0},
+  {"tcr_el3", TCR_EL3, 0},
+  {"vttbr_el2", VTTBR_EL2, 0},
+  {"vtcr_el2", VTCR_EL2, 0},
+  {"dacr32_el2", DACR32_EL2, 0},
+  {"spsr_el1", SPSR_EL1, 0},
+  {"spsr_el2", SPSR_EL2, 0},
+  {"spsr_el3", SPSR_EL3, 0},
+  {"elr_el1", ELR_EL1, 0},
+  {"elr_el2", ELR_EL2, 0},
+  {"elr_el3", ELR_EL3, 0},
+  {"sp_el0", SP_EL0, 0},
+  {"sp_el1", SP_EL1, 0},
+  {"sp_el2", SP_EL2, 0},
+  {"spsel", SPSel, 0},
+  {"nzcv", NZCV, 0},
+  {"daif", DAIF, 0},
+  {"currentel", CurrentEL, 0},
+  {"spsr_irq", SPSR_irq, 0},
+  {"spsr_abt", SPSR_abt, 0},
+  {"spsr_und", SPSR_und, 0},
+  {"spsr_fiq", SPSR_fiq, 0},
+  {"fpcr", FPCR, 0},
+  {"fpsr", FPSR, 0},
+  {"dspsr_el0", DSPSR_EL0, 0},
+  {"dlr_el0", DLR_EL0, 0},
+  {"ifsr32_el2", IFSR32_EL2, 0},
+  {"afsr0_el1", AFSR0_EL1, 0},
+  {"afsr0_el2", AFSR0_EL2, 0},
+  {"afsr0_el3", AFSR0_EL3, 0},
+  {"afsr1_el1", AFSR1_EL1, 0},
+  {"afsr1_el2", AFSR1_EL2, 0},
+  {"afsr1_el3", AFSR1_EL3, 0},
+  {"esr_el1", ESR_EL1, 0},
+  {"esr_el2", ESR_EL2, 0},
+  {"esr_el3", ESR_EL3, 0},
+  {"fpexc32_el2", FPEXC32_EL2, 0},
+  {"far_el1", FAR_EL1, 0},
+  {"far_el2", FAR_EL2, 0},
+  {"far_el3", FAR_EL3, 0},
+  {"hpfar_el2", HPFAR_EL2, 0},
+  {"par_el1", PAR_EL1, 0},
+  {"pmcr_el0", PMCR_EL0, 0},
+  {"pmcntenset_el0", PMCNTENSET_EL0, 0},
+  {"pmcntenclr_el0", PMCNTENCLR_EL0, 0},
+  {"pmovsclr_el0", PMOVSCLR_EL0, 0},
+  {"pmselr_el0", PMSELR_EL0, 0},
+  {"pmccntr_el0", PMCCNTR_EL0, 0},
+  {"pmxevtyper_el0", PMXEVTYPER_EL0, 0},
+  {"pmxevcntr_el0", PMXEVCNTR_EL0, 0},
+  {"pmuserenr_el0", PMUSERENR_EL0, 0},
+  {"pmintenset_el1", PMINTENSET_EL1, 0},
+  {"pmintenclr_el1", PMINTENCLR_EL1, 0},
+  {"pmovsset_el0", PMOVSSET_EL0, 0},
+  {"mair_el1", MAIR_EL1, 0},
+  {"mair_el2", MAIR_EL2, 0},
+  {"mair_el3", MAIR_EL3, 0},
+  {"amair_el1", AMAIR_EL1, 0},
+  {"amair_el2", AMAIR_EL2, 0},
+  {"amair_el3", AMAIR_EL3, 0},
+  {"vbar_el1", VBAR_EL1, 0},
+  {"vbar_el2", VBAR_EL2, 0},
+  {"vbar_el3", VBAR_EL3, 0},
+  {"rmr_el1", RMR_EL1, 0},
+  {"rmr_el2", RMR_EL2, 0},
+  {"rmr_el3", RMR_EL3, 0},
+  {"contextidr_el1", CONTEXTIDR_EL1, 0},
+  {"tpidr_el0", TPIDR_EL0, 0},
+  {"tpidr_el2", TPIDR_EL2, 0},
+  {"tpidr_el3", TPIDR_EL3, 0},
+  {"tpidrro_el0", TPIDRRO_EL0, 0},
+  {"tpidr_el1", TPIDR_EL1, 0},
+  {"cntfrq_el0", CNTFRQ_EL0, 0},
+  {"cntvoff_el2", CNTVOFF_EL2, 0},
+  {"cntkctl_el1", CNTKCTL_EL1, 0},
+  {"cnthctl_el2", CNTHCTL_EL2, 0},
+  {"cntp_tval_el0", CNTP_TVAL_EL0, 0},
+  {"cnthp_tval_el2", CNTHP_TVAL_EL2, 0},
+  {"cntps_tval_el1", CNTPS_TVAL_EL1, 0},
+  {"cntp_ctl_el0", CNTP_CTL_EL0, 0},
+  {"cnthp_ctl_el2", CNTHP_CTL_EL2, 0},
+  {"cntps_ctl_el1", CNTPS_CTL_EL1, 0},
+  {"cntp_cval_el0", CNTP_CVAL_EL0, 0},
+  {"cnthp_cval_el2", CNTHP_CVAL_EL2, 0},
+  {"cntps_cval_el1", CNTPS_CVAL_EL1, 0},
+  {"cntv_tval_el0", CNTV_TVAL_EL0, 0},
+  {"cntv_ctl_el0", CNTV_CTL_EL0, 0},
+  {"cntv_cval_el0", CNTV_CVAL_EL0, 0},
+  {"pmevcntr0_el0", PMEVCNTR0_EL0, 0},
+  {"pmevcntr1_el0", PMEVCNTR1_EL0, 0},
+  {"pmevcntr2_el0", PMEVCNTR2_EL0, 0},
+  {"pmevcntr3_el0", PMEVCNTR3_EL0, 0},
+  {"pmevcntr4_el0", PMEVCNTR4_EL0, 0},
+  {"pmevcntr5_el0", PMEVCNTR5_EL0, 0},
+  {"pmevcntr6_el0", PMEVCNTR6_EL0, 0},
+  {"pmevcntr7_el0", PMEVCNTR7_EL0, 0},
+  {"pmevcntr8_el0", PMEVCNTR8_EL0, 0},
+  {"pmevcntr9_el0", PMEVCNTR9_EL0, 0},
+  {"pmevcntr10_el0", PMEVCNTR10_EL0, 0},
+  {"pmevcntr11_el0", PMEVCNTR11_EL0, 0},
+  {"pmevcntr12_el0", PMEVCNTR12_EL0, 0},
+  {"pmevcntr13_el0", PMEVCNTR13_EL0, 0},
+  {"pmevcntr14_el0", PMEVCNTR14_EL0, 0},
+  {"pmevcntr15_el0", PMEVCNTR15_EL0, 0},
+  {"pmevcntr16_el0", PMEVCNTR16_EL0, 0},
+  {"pmevcntr17_el0", PMEVCNTR17_EL0, 0},
+  {"pmevcntr18_el0", PMEVCNTR18_EL0, 0},
+  {"pmevcntr19_el0", PMEVCNTR19_EL0, 0},
+  {"pmevcntr20_el0", PMEVCNTR20_EL0, 0},
+  {"pmevcntr21_el0", PMEVCNTR21_EL0, 0},
+  {"pmevcntr22_el0", PMEVCNTR22_EL0, 0},
+  {"pmevcntr23_el0", PMEVCNTR23_EL0, 0},
+  {"pmevcntr24_el0", PMEVCNTR24_EL0, 0},
+  {"pmevcntr25_el0", PMEVCNTR25_EL0, 0},
+  {"pmevcntr26_el0", PMEVCNTR26_EL0, 0},
+  {"pmevcntr27_el0", PMEVCNTR27_EL0, 0},
+  {"pmevcntr28_el0", PMEVCNTR28_EL0, 0},
+  {"pmevcntr29_el0", PMEVCNTR29_EL0, 0},
+  {"pmevcntr30_el0", PMEVCNTR30_EL0, 0},
+  {"pmccfiltr_el0", PMCCFILTR_EL0, 0},
+  {"pmevtyper0_el0", PMEVTYPER0_EL0, 0},
+  {"pmevtyper1_el0", PMEVTYPER1_EL0, 0},
+  {"pmevtyper2_el0", PMEVTYPER2_EL0, 0},
+  {"pmevtyper3_el0", PMEVTYPER3_EL0, 0},
+  {"pmevtyper4_el0", PMEVTYPER4_EL0, 0},
+  {"pmevtyper5_el0", PMEVTYPER5_EL0, 0},
+  {"pmevtyper6_el0", PMEVTYPER6_EL0, 0},
+  {"pmevtyper7_el0", PMEVTYPER7_EL0, 0},
+  {"pmevtyper8_el0", PMEVTYPER8_EL0, 0},
+  {"pmevtyper9_el0", PMEVTYPER9_EL0, 0},
+  {"pmevtyper10_el0", PMEVTYPER10_EL0, 0},
+  {"pmevtyper11_el0", PMEVTYPER11_EL0, 0},
+  {"pmevtyper12_el0", PMEVTYPER12_EL0, 0},
+  {"pmevtyper13_el0", PMEVTYPER13_EL0, 0},
+  {"pmevtyper14_el0", PMEVTYPER14_EL0, 0},
+  {"pmevtyper15_el0", PMEVTYPER15_EL0, 0},
+  {"pmevtyper16_el0", PMEVTYPER16_EL0, 0},
+  {"pmevtyper17_el0", PMEVTYPER17_EL0, 0},
+  {"pmevtyper18_el0", PMEVTYPER18_EL0, 0},
+  {"pmevtyper19_el0", PMEVTYPER19_EL0, 0},
+  {"pmevtyper20_el0", PMEVTYPER20_EL0, 0},
+  {"pmevtyper21_el0", PMEVTYPER21_EL0, 0},
+  {"pmevtyper22_el0", PMEVTYPER22_EL0, 0},
+  {"pmevtyper23_el0", PMEVTYPER23_EL0, 0},
+  {"pmevtyper24_el0", PMEVTYPER24_EL0, 0},
+  {"pmevtyper25_el0", PMEVTYPER25_EL0, 0},
+  {"pmevtyper26_el0", PMEVTYPER26_EL0, 0},
+  {"pmevtyper27_el0", PMEVTYPER27_EL0, 0},
+  {"pmevtyper28_el0", PMEVTYPER28_EL0, 0},
+  {"pmevtyper29_el0", PMEVTYPER29_EL0, 0},
+  {"pmevtyper30_el0", PMEVTYPER30_EL0, 0},
 
   // Trace registers
-  {"trcprgctlr", TRCPRGCTLR},
-  {"trcprocselr", TRCPROCSELR},
-  {"trcconfigr", TRCCONFIGR},
-  {"trcauxctlr", TRCAUXCTLR},
-  {"trceventctl0r", TRCEVENTCTL0R},
-  {"trceventctl1r", TRCEVENTCTL1R},
-  {"trcstallctlr", TRCSTALLCTLR},
-  {"trctsctlr", TRCTSCTLR},
-  {"trcsyncpr", TRCSYNCPR},
-  {"trcccctlr", TRCCCCTLR},
-  {"trcbbctlr", TRCBBCTLR},
-  {"trctraceidr", TRCTRACEIDR},
-  {"trcqctlr", TRCQCTLR},
-  {"trcvictlr", TRCVICTLR},
-  {"trcviiectlr", TRCVIIECTLR},
-  {"trcvissctlr", TRCVISSCTLR},
-  {"trcvipcssctlr", TRCVIPCSSCTLR},
-  {"trcvdctlr", TRCVDCTLR},
-  {"trcvdsacctlr", TRCVDSACCTLR},
-  {"trcvdarcctlr", TRCVDARCCTLR},
-  {"trcseqevr0", TRCSEQEVR0},
-  {"trcseqevr1", TRCSEQEVR1},
-  {"trcseqevr2", TRCSEQEVR2},
-  {"trcseqrstevr", TRCSEQRSTEVR},
-  {"trcseqstr", TRCSEQSTR},
-  {"trcextinselr", TRCEXTINSELR},
-  {"trccntrldvr0", TRCCNTRLDVR0},
-  {"trccntrldvr1", TRCCNTRLDVR1},
-  {"trccntrldvr2", TRCCNTRLDVR2},
-  {"trccntrldvr3", TRCCNTRLDVR3},
-  {"trccntctlr0", TRCCNTCTLR0},
-  {"trccntctlr1", TRCCNTCTLR1},
-  {"trccntctlr2", TRCCNTCTLR2},
-  {"trccntctlr3", TRCCNTCTLR3},
-  {"trccntvr0", TRCCNTVR0},
-  {"trccntvr1", TRCCNTVR1},
-  {"trccntvr2", TRCCNTVR2},
-  {"trccntvr3", TRCCNTVR3},
-  {"trcimspec0", TRCIMSPEC0},
-  {"trcimspec1", TRCIMSPEC1},
-  {"trcimspec2", TRCIMSPEC2},
-  {"trcimspec3", TRCIMSPEC3},
-  {"trcimspec4", TRCIMSPEC4},
-  {"trcimspec5", TRCIMSPEC5},
-  {"trcimspec6", TRCIMSPEC6},
-  {"trcimspec7", TRCIMSPEC7},
-  {"trcrsctlr2", TRCRSCTLR2},
-  {"trcrsctlr3", TRCRSCTLR3},
-  {"trcrsctlr4", TRCRSCTLR4},
-  {"trcrsctlr5", TRCRSCTLR5},
-  {"trcrsctlr6", TRCRSCTLR6},
-  {"trcrsctlr7", TRCRSCTLR7},
-  {"trcrsctlr8", TRCRSCTLR8},
-  {"trcrsctlr9", TRCRSCTLR9},
-  {"trcrsctlr10", TRCRSCTLR10},
-  {"trcrsctlr11", TRCRSCTLR11},
-  {"trcrsctlr12", TRCRSCTLR12},
-  {"trcrsctlr13", TRCRSCTLR13},
-  {"trcrsctlr14", TRCRSCTLR14},
-  {"trcrsctlr15", TRCRSCTLR15},
-  {"trcrsctlr16", TRCRSCTLR16},
-  {"trcrsctlr17", TRCRSCTLR17},
-  {"trcrsctlr18", TRCRSCTLR18},
-  {"trcrsctlr19", TRCRSCTLR19},
-  {"trcrsctlr20", TRCRSCTLR20},
-  {"trcrsctlr21", TRCRSCTLR21},
-  {"trcrsctlr22", TRCRSCTLR22},
-  {"trcrsctlr23", TRCRSCTLR23},
-  {"trcrsctlr24", TRCRSCTLR24},
-  {"trcrsctlr25", TRCRSCTLR25},
-  {"trcrsctlr26", TRCRSCTLR26},
-  {"trcrsctlr27", TRCRSCTLR27},
-  {"trcrsctlr28", TRCRSCTLR28},
-  {"trcrsctlr29", TRCRSCTLR29},
-  {"trcrsctlr30", TRCRSCTLR30},
-  {"trcrsctlr31", TRCRSCTLR31},
-  {"trcssccr0", TRCSSCCR0},
-  {"trcssccr1", TRCSSCCR1},
-  {"trcssccr2", TRCSSCCR2},
-  {"trcssccr3", TRCSSCCR3},
-  {"trcssccr4", TRCSSCCR4},
-  {"trcssccr5", TRCSSCCR5},
-  {"trcssccr6", TRCSSCCR6},
-  {"trcssccr7", TRCSSCCR7},
-  {"trcsscsr0", TRCSSCSR0},
-  {"trcsscsr1", TRCSSCSR1},
-  {"trcsscsr2", TRCSSCSR2},
-  {"trcsscsr3", TRCSSCSR3},
-  {"trcsscsr4", TRCSSCSR4},
-  {"trcsscsr5", TRCSSCSR5},
-  {"trcsscsr6", TRCSSCSR6},
-  {"trcsscsr7", TRCSSCSR7},
-  {"trcsspcicr0", TRCSSPCICR0},
-  {"trcsspcicr1", TRCSSPCICR1},
-  {"trcsspcicr2", TRCSSPCICR2},
-  {"trcsspcicr3", TRCSSPCICR3},
-  {"trcsspcicr4", TRCSSPCICR4},
-  {"trcsspcicr5", TRCSSPCICR5},
-  {"trcsspcicr6", TRCSSPCICR6},
-  {"trcsspcicr7", TRCSSPCICR7},
-  {"trcpdcr", TRCPDCR},
-  {"trcacvr0", TRCACVR0},
-  {"trcacvr1", TRCACVR1},
-  {"trcacvr2", TRCACVR2},
-  {"trcacvr3", TRCACVR3},
-  {"trcacvr4", TRCACVR4},
-  {"trcacvr5", TRCACVR5},
-  {"trcacvr6", TRCACVR6},
-  {"trcacvr7", TRCACVR7},
-  {"trcacvr8", TRCACVR8},
-  {"trcacvr9", TRCACVR9},
-  {"trcacvr10", TRCACVR10},
-  {"trcacvr11", TRCACVR11},
-  {"trcacvr12", TRCACVR12},
-  {"trcacvr13", TRCACVR13},
-  {"trcacvr14", TRCACVR14},
-  {"trcacvr15", TRCACVR15},
-  {"trcacatr0", TRCACATR0},
-  {"trcacatr1", TRCACATR1},
-  {"trcacatr2", TRCACATR2},
-  {"trcacatr3", TRCACATR3},
-  {"trcacatr4", TRCACATR4},
-  {"trcacatr5", TRCACATR5},
-  {"trcacatr6", TRCACATR6},
-  {"trcacatr7", TRCACATR7},
-  {"trcacatr8", TRCACATR8},
-  {"trcacatr9", TRCACATR9},
-  {"trcacatr10", TRCACATR10},
-  {"trcacatr11", TRCACATR11},
-  {"trcacatr12", TRCACATR12},
-  {"trcacatr13", TRCACATR13},
-  {"trcacatr14", TRCACATR14},
-  {"trcacatr15", TRCACATR15},
-  {"trcdvcvr0", TRCDVCVR0},
-  {"trcdvcvr1", TRCDVCVR1},
-  {"trcdvcvr2", TRCDVCVR2},
-  {"trcdvcvr3", TRCDVCVR3},
-  {"trcdvcvr4", TRCDVCVR4},
-  {"trcdvcvr5", TRCDVCVR5},
-  {"trcdvcvr6", TRCDVCVR6},
-  {"trcdvcvr7", TRCDVCVR7},
-  {"trcdvcmr0", TRCDVCMR0},
-  {"trcdvcmr1", TRCDVCMR1},
-  {"trcdvcmr2", TRCDVCMR2},
-  {"trcdvcmr3", TRCDVCMR3},
-  {"trcdvcmr4", TRCDVCMR4},
-  {"trcdvcmr5", TRCDVCMR5},
-  {"trcdvcmr6", TRCDVCMR6},
-  {"trcdvcmr7", TRCDVCMR7},
-  {"trccidcvr0", TRCCIDCVR0},
-  {"trccidcvr1", TRCCIDCVR1},
-  {"trccidcvr2", TRCCIDCVR2},
-  {"trccidcvr3", TRCCIDCVR3},
-  {"trccidcvr4", TRCCIDCVR4},
-  {"trccidcvr5", TRCCIDCVR5},
-  {"trccidcvr6", TRCCIDCVR6},
-  {"trccidcvr7", TRCCIDCVR7},
-  {"trcvmidcvr0", TRCVMIDCVR0},
-  {"trcvmidcvr1", TRCVMIDCVR1},
-  {"trcvmidcvr2", TRCVMIDCVR2},
-  {"trcvmidcvr3", TRCVMIDCVR3},
-  {"trcvmidcvr4", TRCVMIDCVR4},
-  {"trcvmidcvr5", TRCVMIDCVR5},
-  {"trcvmidcvr6", TRCVMIDCVR6},
-  {"trcvmidcvr7", TRCVMIDCVR7},
-  {"trccidcctlr0", TRCCIDCCTLR0},
-  {"trccidcctlr1", TRCCIDCCTLR1},
-  {"trcvmidcctlr0", TRCVMIDCCTLR0},
-  {"trcvmidcctlr1", TRCVMIDCCTLR1},
-  {"trcitctrl", TRCITCTRL},
-  {"trcclaimset", TRCCLAIMSET},
-  {"trcclaimclr", TRCCLAIMCLR},
+  {"trcprgctlr", TRCPRGCTLR, 0},
+  {"trcprocselr", TRCPROCSELR, 0},
+  {"trcconfigr", TRCCONFIGR, 0},
+  {"trcauxctlr", TRCAUXCTLR, 0},
+  {"trceventctl0r", TRCEVENTCTL0R, 0},
+  {"trceventctl1r", TRCEVENTCTL1R, 0},
+  {"trcstallctlr", TRCSTALLCTLR, 0},
+  {"trctsctlr", TRCTSCTLR, 0},
+  {"trcsyncpr", TRCSYNCPR, 0},
+  {"trcccctlr", TRCCCCTLR, 0},
+  {"trcbbctlr", TRCBBCTLR, 0},
+  {"trctraceidr", TRCTRACEIDR, 0},
+  {"trcqctlr", TRCQCTLR, 0},
+  {"trcvictlr", TRCVICTLR, 0},
+  {"trcviiectlr", TRCVIIECTLR, 0},
+  {"trcvissctlr", TRCVISSCTLR, 0},
+  {"trcvipcssctlr", TRCVIPCSSCTLR, 0},
+  {"trcvdctlr", TRCVDCTLR, 0},
+  {"trcvdsacctlr", TRCVDSACCTLR, 0},
+  {"trcvdarcctlr", TRCVDARCCTLR, 0},
+  {"trcseqevr0", TRCSEQEVR0, 0},
+  {"trcseqevr1", TRCSEQEVR1, 0},
+  {"trcseqevr2", TRCSEQEVR2, 0},
+  {"trcseqrstevr", TRCSEQRSTEVR, 0},
+  {"trcseqstr", TRCSEQSTR, 0},
+  {"trcextinselr", TRCEXTINSELR, 0},
+  {"trccntrldvr0", TRCCNTRLDVR0, 0},
+  {"trccntrldvr1", TRCCNTRLDVR1, 0},
+  {"trccntrldvr2", TRCCNTRLDVR2, 0},
+  {"trccntrldvr3", TRCCNTRLDVR3, 0},
+  {"trccntctlr0", TRCCNTCTLR0, 0},
+  {"trccntctlr1", TRCCNTCTLR1, 0},
+  {"trccntctlr2", TRCCNTCTLR2, 0},
+  {"trccntctlr3", TRCCNTCTLR3, 0},
+  {"trccntvr0", TRCCNTVR0, 0},
+  {"trccntvr1", TRCCNTVR1, 0},
+  {"trccntvr2", TRCCNTVR2, 0},
+  {"trccntvr3", TRCCNTVR3, 0},
+  {"trcimspec0", TRCIMSPEC0, 0},
+  {"trcimspec1", TRCIMSPEC1, 0},
+  {"trcimspec2", TRCIMSPEC2, 0},
+  {"trcimspec3", TRCIMSPEC3, 0},
+  {"trcimspec4", TRCIMSPEC4, 0},
+  {"trcimspec5", TRCIMSPEC5, 0},
+  {"trcimspec6", TRCIMSPEC6, 0},
+  {"trcimspec7", TRCIMSPEC7, 0},
+  {"trcrsctlr2", TRCRSCTLR2, 0},
+  {"trcrsctlr3", TRCRSCTLR3, 0},
+  {"trcrsctlr4", TRCRSCTLR4, 0},
+  {"trcrsctlr5", TRCRSCTLR5, 0},
+  {"trcrsctlr6", TRCRSCTLR6, 0},
+  {"trcrsctlr7", TRCRSCTLR7, 0},
+  {"trcrsctlr8", TRCRSCTLR8, 0},
+  {"trcrsctlr9", TRCRSCTLR9, 0},
+  {"trcrsctlr10", TRCRSCTLR10, 0},
+  {"trcrsctlr11", TRCRSCTLR11, 0},
+  {"trcrsctlr12", TRCRSCTLR12, 0},
+  {"trcrsctlr13", TRCRSCTLR13, 0},
+  {"trcrsctlr14", TRCRSCTLR14, 0},
+  {"trcrsctlr15", TRCRSCTLR15, 0},
+  {"trcrsctlr16", TRCRSCTLR16, 0},
+  {"trcrsctlr17", TRCRSCTLR17, 0},
+  {"trcrsctlr18", TRCRSCTLR18, 0},
+  {"trcrsctlr19", TRCRSCTLR19, 0},
+  {"trcrsctlr20", TRCRSCTLR20, 0},
+  {"trcrsctlr21", TRCRSCTLR21, 0},
+  {"trcrsctlr22", TRCRSCTLR22, 0},
+  {"trcrsctlr23", TRCRSCTLR23, 0},
+  {"trcrsctlr24", TRCRSCTLR24, 0},
+  {"trcrsctlr25", TRCRSCTLR25, 0},
+  {"trcrsctlr26", TRCRSCTLR26, 0},
+  {"trcrsctlr27", TRCRSCTLR27, 0},
+  {"trcrsctlr28", TRCRSCTLR28, 0},
+  {"trcrsctlr29", TRCRSCTLR29, 0},
+  {"trcrsctlr30", TRCRSCTLR30, 0},
+  {"trcrsctlr31", TRCRSCTLR31, 0},
+  {"trcssccr0", TRCSSCCR0, 0},
+  {"trcssccr1", TRCSSCCR1, 0},
+  {"trcssccr2", TRCSSCCR2, 0},
+  {"trcssccr3", TRCSSCCR3, 0},
+  {"trcssccr4", TRCSSCCR4, 0},
+  {"trcssccr5", TRCSSCCR5, 0},
+  {"trcssccr6", TRCSSCCR6, 0},
+  {"trcssccr7", TRCSSCCR7, 0},
+  {"trcsscsr0", TRCSSCSR0, 0},
+  {"trcsscsr1", TRCSSCSR1, 0},
+  {"trcsscsr2", TRCSSCSR2, 0},
+  {"trcsscsr3", TRCSSCSR3, 0},
+  {"trcsscsr4", TRCSSCSR4, 0},
+  {"trcsscsr5", TRCSSCSR5, 0},
+  {"trcsscsr6", TRCSSCSR6, 0},
+  {"trcsscsr7", TRCSSCSR7, 0},
+  {"trcsspcicr0", TRCSSPCICR0, 0},
+  {"trcsspcicr1", TRCSSPCICR1, 0},
+  {"trcsspcicr2", TRCSSPCICR2, 0},
+  {"trcsspcicr3", TRCSSPCICR3, 0},
+  {"trcsspcicr4", TRCSSPCICR4, 0},
+  {"trcsspcicr5", TRCSSPCICR5, 0},
+  {"trcsspcicr6", TRCSSPCICR6, 0},
+  {"trcsspcicr7", TRCSSPCICR7, 0},
+  {"trcpdcr", TRCPDCR, 0},
+  {"trcacvr0", TRCACVR0, 0},
+  {"trcacvr1", TRCACVR1, 0},
+  {"trcacvr2", TRCACVR2, 0},
+  {"trcacvr3", TRCACVR3, 0},
+  {"trcacvr4", TRCACVR4, 0},
+  {"trcacvr5", TRCACVR5, 0},
+  {"trcacvr6", TRCACVR6, 0},
+  {"trcacvr7", TRCACVR7, 0},
+  {"trcacvr8", TRCACVR8, 0},
+  {"trcacvr9", TRCACVR9, 0},
+  {"trcacvr10", TRCACVR10, 0},
+  {"trcacvr11", TRCACVR11, 0},
+  {"trcacvr12", TRCACVR12, 0},
+  {"trcacvr13", TRCACVR13, 0},
+  {"trcacvr14", TRCACVR14, 0},
+  {"trcacvr15", TRCACVR15, 0},
+  {"trcacatr0", TRCACATR0, 0},
+  {"trcacatr1", TRCACATR1, 0},
+  {"trcacatr2", TRCACATR2, 0},
+  {"trcacatr3", TRCACATR3, 0},
+  {"trcacatr4", TRCACATR4, 0},
+  {"trcacatr5", TRCACATR5, 0},
+  {"trcacatr6", TRCACATR6, 0},
+  {"trcacatr7", TRCACATR7, 0},
+  {"trcacatr8", TRCACATR8, 0},
+  {"trcacatr9", TRCACATR9, 0},
+  {"trcacatr10", TRCACATR10, 0},
+  {"trcacatr11", TRCACATR11, 0},
+  {"trcacatr12", TRCACATR12, 0},
+  {"trcacatr13", TRCACATR13, 0},
+  {"trcacatr14", TRCACATR14, 0},
+  {"trcacatr15", TRCACATR15, 0},
+  {"trcdvcvr0", TRCDVCVR0, 0},
+  {"trcdvcvr1", TRCDVCVR1, 0},
+  {"trcdvcvr2", TRCDVCVR2, 0},
+  {"trcdvcvr3", TRCDVCVR3, 0},
+  {"trcdvcvr4", TRCDVCVR4, 0},
+  {"trcdvcvr5", TRCDVCVR5, 0},
+  {"trcdvcvr6", TRCDVCVR6, 0},
+  {"trcdvcvr7", TRCDVCVR7, 0},
+  {"trcdvcmr0", TRCDVCMR0, 0},
+  {"trcdvcmr1", TRCDVCMR1, 0},
+  {"trcdvcmr2", TRCDVCMR2, 0},
+  {"trcdvcmr3", TRCDVCMR3, 0},
+  {"trcdvcmr4", TRCDVCMR4, 0},
+  {"trcdvcmr5", TRCDVCMR5, 0},
+  {"trcdvcmr6", TRCDVCMR6, 0},
+  {"trcdvcmr7", TRCDVCMR7, 0},
+  {"trccidcvr0", TRCCIDCVR0, 0},
+  {"trccidcvr1", TRCCIDCVR1, 0},
+  {"trccidcvr2", TRCCIDCVR2, 0},
+  {"trccidcvr3", TRCCIDCVR3, 0},
+  {"trccidcvr4", TRCCIDCVR4, 0},
+  {"trccidcvr5", TRCCIDCVR5, 0},
+  {"trccidcvr6", TRCCIDCVR6, 0},
+  {"trccidcvr7", TRCCIDCVR7, 0},
+  {"trcvmidcvr0", TRCVMIDCVR0, 0},
+  {"trcvmidcvr1", TRCVMIDCVR1, 0},
+  {"trcvmidcvr2", TRCVMIDCVR2, 0},
+  {"trcvmidcvr3", TRCVMIDCVR3, 0},
+  {"trcvmidcvr4", TRCVMIDCVR4, 0},
+  {"trcvmidcvr5", TRCVMIDCVR5, 0},
+  {"trcvmidcvr6", TRCVMIDCVR6, 0},
+  {"trcvmidcvr7", TRCVMIDCVR7, 0},
+  {"trccidcctlr0", TRCCIDCCTLR0, 0},
+  {"trccidcctlr1", TRCCIDCCTLR1, 0},
+  {"trcvmidcctlr0", TRCVMIDCCTLR0, 0},
+  {"trcvmidcctlr1", TRCVMIDCCTLR1, 0},
+  {"trcitctrl", TRCITCTRL, 0},
+  {"trcclaimset", TRCCLAIMSET, 0},
+  {"trcclaimclr", TRCCLAIMCLR, 0},
 
   // GICv3 registers
-  {"icc_bpr1_el1", ICC_BPR1_EL1},
-  {"icc_bpr0_el1", ICC_BPR0_EL1},
-  {"icc_pmr_el1", ICC_PMR_EL1},
-  {"icc_ctlr_el1", ICC_CTLR_EL1},
-  {"icc_ctlr_el3", ICC_CTLR_EL3},
-  {"icc_sre_el1", ICC_SRE_EL1},
-  {"icc_sre_el2", ICC_SRE_EL2},
-  {"icc_sre_el3", ICC_SRE_EL3},
-  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
-  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
-  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
-  {"icc_seien_el1", ICC_SEIEN_EL1},
-  {"icc_ap0r0_el1", ICC_AP0R0_EL1},
-  {"icc_ap0r1_el1", ICC_AP0R1_EL1},
-  {"icc_ap0r2_el1", ICC_AP0R2_EL1},
-  {"icc_ap0r3_el1", ICC_AP0R3_EL1},
-  {"icc_ap1r0_el1", ICC_AP1R0_EL1},
-  {"icc_ap1r1_el1", ICC_AP1R1_EL1},
-  {"icc_ap1r2_el1", ICC_AP1R2_EL1},
-  {"icc_ap1r3_el1", ICC_AP1R3_EL1},
-  {"ich_ap0r0_el2", ICH_AP0R0_EL2},
-  {"ich_ap0r1_el2", ICH_AP0R1_EL2},
-  {"ich_ap0r2_el2", ICH_AP0R2_EL2},
-  {"ich_ap0r3_el2", ICH_AP0R3_EL2},
-  {"ich_ap1r0_el2", ICH_AP1R0_EL2},
-  {"ich_ap1r1_el2", ICH_AP1R1_EL2},
-  {"ich_ap1r2_el2", ICH_AP1R2_EL2},
-  {"ich_ap1r3_el2", ICH_AP1R3_EL2},
-  {"ich_hcr_el2", ICH_HCR_EL2},
-  {"ich_misr_el2", ICH_MISR_EL2},
-  {"ich_vmcr_el2", ICH_VMCR_EL2},
-  {"ich_vseir_el2", ICH_VSEIR_EL2},
-  {"ich_lr0_el2", ICH_LR0_EL2},
-  {"ich_lr1_el2", ICH_LR1_EL2},
-  {"ich_lr2_el2", ICH_LR2_EL2},
-  {"ich_lr3_el2", ICH_LR3_EL2},
-  {"ich_lr4_el2", ICH_LR4_EL2},
-  {"ich_lr5_el2", ICH_LR5_EL2},
-  {"ich_lr6_el2", ICH_LR6_EL2},
-  {"ich_lr7_el2", ICH_LR7_EL2},
-  {"ich_lr8_el2", ICH_LR8_EL2},
-  {"ich_lr9_el2", ICH_LR9_EL2},
-  {"ich_lr10_el2", ICH_LR10_EL2},
-  {"ich_lr11_el2", ICH_LR11_EL2},
-  {"ich_lr12_el2", ICH_LR12_EL2},
-  {"ich_lr13_el2", ICH_LR13_EL2},
-  {"ich_lr14_el2", ICH_LR14_EL2},
-  {"ich_lr15_el2", ICH_LR15_EL2}
-};
-
-const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = {
-  {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
+  {"icc_bpr1_el1", ICC_BPR1_EL1, 0},
+  {"icc_bpr0_el1", ICC_BPR0_EL1, 0},
+  {"icc_pmr_el1", ICC_PMR_EL1, 0},
+  {"icc_ctlr_el1", ICC_CTLR_EL1, 0},
+  {"icc_ctlr_el3", ICC_CTLR_EL3, 0},
+  {"icc_sre_el1", ICC_SRE_EL1, 0},
+  {"icc_sre_el2", ICC_SRE_EL2, 0},
+  {"icc_sre_el3", ICC_SRE_EL3, 0},
+  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, 0},
+  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, 0},
+  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, 0},
+  {"icc_seien_el1", ICC_SEIEN_EL1, 0},
+  {"icc_ap0r0_el1", ICC_AP0R0_EL1, 0},
+  {"icc_ap0r1_el1", ICC_AP0R1_EL1, 0},
+  {"icc_ap0r2_el1", ICC_AP0R2_EL1, 0},
+  {"icc_ap0r3_el1", ICC_AP0R3_EL1, 0},
+  {"icc_ap1r0_el1", ICC_AP1R0_EL1, 0},
+  {"icc_ap1r1_el1", ICC_AP1R1_EL1, 0},
+  {"icc_ap1r2_el1", ICC_AP1R2_EL1, 0},
+  {"icc_ap1r3_el1", ICC_AP1R3_EL1, 0},
+  {"ich_ap0r0_el2", ICH_AP0R0_EL2, 0},
+  {"ich_ap0r1_el2", ICH_AP0R1_EL2, 0},
+  {"ich_ap0r2_el2", ICH_AP0R2_EL2, 0},
+  {"ich_ap0r3_el2", ICH_AP0R3_EL2, 0},
+  {"ich_ap1r0_el2", ICH_AP1R0_EL2, 0},
+  {"ich_ap1r1_el2", ICH_AP1R1_EL2, 0},
+  {"ich_ap1r2_el2", ICH_AP1R2_EL2, 0},
+  {"ich_ap1r3_el2", ICH_AP1R3_EL2, 0},
+  {"ich_hcr_el2", ICH_HCR_EL2, 0},
+  {"ich_misr_el2", ICH_MISR_EL2, 0},
+  {"ich_vmcr_el2", ICH_VMCR_EL2, 0},
+  {"ich_vseir_el2", ICH_VSEIR_EL2, 0},
+  {"ich_lr0_el2", ICH_LR0_EL2, 0},
+  {"ich_lr1_el2", ICH_LR1_EL2, 0},
+  {"ich_lr2_el2", ICH_LR2_EL2, 0},
+  {"ich_lr3_el2", ICH_LR3_EL2, 0},
+  {"ich_lr4_el2", ICH_LR4_EL2, 0},
+  {"ich_lr5_el2", ICH_LR5_EL2, 0},
+  {"ich_lr6_el2", ICH_LR6_EL2, 0},
+  {"ich_lr7_el2", ICH_LR7_EL2, 0},
+  {"ich_lr8_el2", ICH_LR8_EL2, 0},
+  {"ich_lr9_el2", ICH_LR9_EL2, 0},
+  {"ich_lr10_el2", ICH_LR10_EL2, 0},
+  {"ich_lr11_el2", ICH_LR11_EL2, 0},
+  {"ich_lr12_el2", ICH_LR12_EL2, 0},
+  {"ich_lr13_el2", ICH_LR13_EL2, 0},
+  {"ich_lr14_el2", ICH_LR14_EL2, 0},
+  {"ich_lr15_el2", ICH_LR15_EL2, 0},
+
+  // Cyclone registers
+  {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, AArch64::ProcCyclone},
+
+  // v8.1a "Privileged Access Never" extension-specific system registers
+  {"pan", PAN, AArch64::HasV8_1aOps},
+
+  // v8.1a "Limited Ordering Regions" extension-specific system registers
+  {"lorsa_el1", LORSA_EL1, AArch64::HasV8_1aOps},
+  {"lorea_el1", LOREA_EL1, AArch64::HasV8_1aOps},
+  {"lorn_el1", LORN_EL1, AArch64::HasV8_1aOps},
+  {"lorc_el1", LORC_EL1, AArch64::HasV8_1aOps},
+  {"lorid_el1", LORID_EL1, AArch64::HasV8_1aOps},
+
+  // v8.1a "Virtualization host extensions" system registers
+  {"ttbr1_el2", TTBR1_EL2, AArch64::HasV8_1aOps},
+  {"contextidr_el2", CONTEXTIDR_EL2, AArch64::HasV8_1aOps},
+  {"cnthv_tval_el2", CNTHV_TVAL_EL2, AArch64::HasV8_1aOps},
+  {"cnthv_cval_el2", CNTHV_CVAL_EL2, AArch64::HasV8_1aOps},
+  {"cnthv_ctl_el2", CNTHV_CTL_EL2, AArch64::HasV8_1aOps},
+  {"sctlr_el12", SCTLR_EL12, AArch64::HasV8_1aOps},
+  {"cpacr_el12", CPACR_EL12, AArch64::HasV8_1aOps},
+  {"ttbr0_el12", TTBR0_EL12, AArch64::HasV8_1aOps},
+  {"ttbr1_el12", TTBR1_EL12, AArch64::HasV8_1aOps},
+  {"tcr_el12", TCR_EL12, AArch64::HasV8_1aOps},
+  {"afsr0_el12", AFSR0_EL12, AArch64::HasV8_1aOps},
+  {"afsr1_el12", AFSR1_EL12, AArch64::HasV8_1aOps},
+  {"esr_el12", ESR_EL12, AArch64::HasV8_1aOps},
+  {"far_el12", FAR_EL12, AArch64::HasV8_1aOps},
+  {"mair_el12", MAIR_EL12, AArch64::HasV8_1aOps},
+  {"amair_el12", AMAIR_EL12, AArch64::HasV8_1aOps},
+  {"vbar_el12", VBAR_EL12, AArch64::HasV8_1aOps},
+  {"contextidr_el12", CONTEXTIDR_EL12, AArch64::HasV8_1aOps},
+  {"cntkctl_el12", CNTKCTL_EL12, AArch64::HasV8_1aOps},
+  {"cntp_tval_el02", CNTP_TVAL_EL02, AArch64::HasV8_1aOps},
+  {"cntp_ctl_el02", CNTP_CTL_EL02, AArch64::HasV8_1aOps},
+  {"cntp_cval_el02", CNTP_CVAL_EL02, AArch64::HasV8_1aOps},
+  {"cntv_tval_el02", CNTV_TVAL_EL02, AArch64::HasV8_1aOps},
+  {"cntv_ctl_el02", CNTV_CTL_EL02, AArch64::HasV8_1aOps},
+  {"cntv_cval_el02", CNTV_CVAL_EL02, AArch64::HasV8_1aOps},
+  {"spsr_el12", SPSR_EL12, AArch64::HasV8_1aOps},
+  {"elr_el12", ELR_EL12, AArch64::HasV8_1aOps},
 };
 
 uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+AArch64SysReg::SysRegMapper::fromString(StringRef Name, uint64_t FeatureBits, 
+                                        bool &Valid) const {
   std::string NameLower = Name.lower();
 
   // First search the registers shared by all
   for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
-    if (SysRegMappings[i].Name == NameLower) {
+    if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) {
       Valid = true;
       return SysRegMappings[i].Value;
     }
   }
 
-  // Next search for target specific registers
-  if (FeatureBits & AArch64::ProcCyclone) {
-    for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
-      if (CycloneSysRegMappings[i].Name == NameLower) {
-        Valid = true;
-        return CycloneSysRegMappings[i].Value;
-      }
-    }
-  }
-
   // Now try the instruction-specific registers (either read-only or
   // write-only).
   for (unsigned i = 0; i < NumInstMappings; ++i) {
-    if (InstMappings[i].Name == NameLower) {
+    if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) {
       Valid = true;
       return InstMappings[i].Value;
     }
@@ -814,27 +848,18 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
 }
 
 std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits, uint64_t FeatureBits) const {
   // First search the registers shared by all
   for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
-    if (SysRegMappings[i].Value == Bits) {
+    if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) {
       return SysRegMappings[i].Name;
     }
   }
 
-  // Next search for target specific registers
-  if (FeatureBits & AArch64::ProcCyclone) {
-    for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
-      if (CycloneSysRegMappings[i].Value == Bits) {
-        return CycloneSysRegMappings[i].Name;
-      }
-    }
-  }
-
   // Now try the instruction-specific registers (either read-only or
   // write-only).
   for (unsigned i = 0; i < NumInstMappings; ++i) {
-    if (InstMappings[i].Value == Bits) {
+    if (InstMappings[i].isValueEqual(Bits, FeatureBits)) {
       return InstMappings[i].Name;
     }
   }
@@ -851,38 +876,38 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
 }
 
 const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
-  {"ipas2e1is", IPAS2E1IS},
-  {"ipas2le1is", IPAS2LE1IS},
-  {"vmalle1is", VMALLE1IS},
-  {"alle2is", ALLE2IS},
-  {"alle3is", ALLE3IS},
-  {"vae1is", VAE1IS},
-  {"vae2is", VAE2IS},
-  {"vae3is", VAE3IS},
-  {"aside1is", ASIDE1IS},
-  {"vaae1is", VAAE1IS},
-  {"alle1is", ALLE1IS},
-  {"vale1is", VALE1IS},
-  {"vale2is", VALE2IS},
-  {"vale3is", VALE3IS},
-  {"vmalls12e1is", VMALLS12E1IS},
-  {"vaale1is", VAALE1IS},
-  {"ipas2e1", IPAS2E1},
-  {"ipas2le1", IPAS2LE1},
-  {"vmalle1", VMALLE1},
-  {"alle2", ALLE2},
-  {"alle3", ALLE3},
-  {"vae1", VAE1},
-  {"vae2", VAE2},
-  {"vae3", VAE3},
-  {"aside1", ASIDE1},
-  {"vaae1", VAAE1},
-  {"alle1", ALLE1},
-  {"vale1", VALE1},
-  {"vale2", VALE2},
-  {"vale3", VALE3},
-  {"vmalls12e1", VMALLS12E1},
-  {"vaale1", VAALE1}
+  {"ipas2e1is", IPAS2E1IS, 0},
+  {"ipas2le1is", IPAS2LE1IS, 0},
+  {"vmalle1is", VMALLE1IS, 0},
+  {"alle2is", ALLE2IS, 0},
+  {"alle3is", ALLE3IS, 0},
+  {"vae1is", VAE1IS, 0},
+  {"vae2is", VAE2IS, 0},
+  {"vae3is", VAE3IS, 0},
+  {"aside1is", ASIDE1IS, 0},
+  {"vaae1is", VAAE1IS, 0},
+  {"alle1is", ALLE1IS, 0},
+  {"vale1is", VALE1IS, 0},
+  {"vale2is", VALE2IS, 0},
+  {"vale3is", VALE3IS, 0},
+  {"vmalls12e1is", VMALLS12E1IS, 0},
+  {"vaale1is", VAALE1IS, 0},
+  {"ipas2e1", IPAS2E1, 0},
+  {"ipas2le1", IPAS2LE1, 0},
+  {"vmalle1", VMALLE1, 0},
+  {"alle2", ALLE2, 0},
+  {"alle3", ALLE3, 0},
+  {"vae1", VAE1, 0},
+  {"vae2", VAE2, 0},
+  {"vae3", VAE3, 0},
+  {"aside1", ASIDE1, 0},
+  {"vaae1", VAAE1, 0},
+  {"alle1", ALLE1, 0},
+  {"vale1", VALE1, 0},
+  {"vale2", VALE2, 0},
+  {"vale3", VALE3, 0},
+  {"vmalls12e1", VMALLS12E1, 0},
+  {"vaale1", VAALE1, 0}
 };
 
 AArch64TLBI::TLBIMapper::TLBIMapper()
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 2ae6f52..659ea90 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -280,14 +280,26 @@ struct AArch64NamedImmMapper {
   struct Mapping {
     const char *Name;
     uint32_t Value;
+    uint64_t AvailableForFeatures;
+    // empty AvailableForFeatures means "always-on"
+    bool isNameEqual(std::string Other, uint64_t FeatureBits=~0ULL) const {
+      if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits))
+        return false;
+      return Name == Other;
+    }
+    bool isValueEqual(uint32_t Other, uint64_t FeatureBits=~0ULL) const {
+      if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits))
+        return false;
+      return Value == Other;
+    }
   };
 
   template<int N>
   AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
     : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
 
-  StringRef toString(uint32_t Value, bool &Valid) const;
-  uint32_t fromString(StringRef Name, bool &Valid) const;
+  StringRef toString(uint32_t Value, uint64_t FeatureBits, bool &Valid) const;
+  uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const;
 
   /// Many of the instructions allow an alternative assembly form consisting of
   /// a simple immediate. Currently the only valid forms are ranges [0, N) where
@@ -435,7 +447,10 @@ namespace AArch64PState {
     Invalid = -1,
     SPSel = 0x05,
     DAIFSet = 0x1e,
-    DAIFClr = 0x1f
+    DAIFClr = 0x1f,
+
+    // v8.1a "Privileged Access Never" extension-specific PStates
+    PAN = 0x04,
   };
 
   struct PStateMapper : AArch64NamedImmMapper {
@@ -1122,11 +1137,48 @@ namespace AArch64SysReg {
     ICH_LR13_EL2      = 0xe66d, // 11  100  1100  1101  101
     ICH_LR14_EL2      = 0xe66e, // 11  100  1100  1101  110
     ICH_LR15_EL2      = 0xe66f, // 11  100  1100  1101  111
-  };
 
-  // Cyclone specific system registers
-  enum CycloneSysRegValues {
-    CPM_IOACC_CTL_EL3 = 0xff90
+    // v8.1a "Privileged Access Never" extension-specific system registers
+    PAN               = 0xc213, // 11  000  0100  0010  011
+
+    // v8.1a "Limited Ordering Regions" extension-specific system registers
+    LORSA_EL1         = 0xc520, // 11  000  1010  0100  000
+    LOREA_EL1         = 0xc521, // 11  000  1010  0100  001
+    LORN_EL1          = 0xc522, // 11  000  1010  0100  010
+    LORC_EL1          = 0xc523, // 11  000  1010  0100  011
+    LORID_EL1         = 0xc527, // 11  000  1010  0100  111
+
+    // v8.1a "Virtualization host extensions" system registers
+    TTBR1_EL2         = 0xe101, // 11  100  0010  0000  001
+    CONTEXTIDR_EL2    = 0xe681, // 11  100  1101  0000  001
+    CNTHV_TVAL_EL2    = 0xe718, // 11  100  1110  0011  000
+    CNTHV_CVAL_EL2    = 0xe71a, // 11  100  1110  0011  010
+    CNTHV_CTL_EL2     = 0xe719, // 11  100  1110  0011  001
+    SCTLR_EL12        = 0xe880, // 11  101  0001  0000  000
+    CPACR_EL12        = 0xe882, // 11  101  0001  0000  010
+    TTBR0_EL12        = 0xe900, // 11  101  0010  0000  000
+    TTBR1_EL12        = 0xe901, // 11  101  0010  0000  001
+    TCR_EL12          = 0xe902, // 11  101  0010  0000  010
+    AFSR0_EL12        = 0xea88, // 11  101  0101  0001  000
+    AFSR1_EL12        = 0xea89, // 11  101  0101  0001  001
+    ESR_EL12          = 0xea90, // 11  101  0101  0010  000
+    FAR_EL12          = 0xeb00, // 11  101  0110  0000  000
+    MAIR_EL12         = 0xed10, // 11  101  1010  0010  000
+    AMAIR_EL12        = 0xed18, // 11  101  1010  0011  000
+    VBAR_EL12         = 0xee00, // 11  101  1100  0000  000
+    CONTEXTIDR_EL12   = 0xee81, // 11  101  1101  0000  001
+    CNTKCTL_EL12      = 0xef08, // 11  101  1110  0001  000
+    CNTP_TVAL_EL02    = 0xef10, // 11  101  1110  0010  000
+    CNTP_CTL_EL02     = 0xef11, // 11  101  1110  0010  001
+    CNTP_CVAL_EL02    = 0xef12, // 11  101  1110  0010  010
+    CNTV_TVAL_EL02    = 0xef18, // 11  101  1110  0011  000
+    CNTV_CTL_EL02     = 0xef19, // 11  101  1110  0011  001
+    CNTV_CVAL_EL02    = 0xef1a, // 11  101  1110  0011  010
+    SPSR_EL12         = 0xea00, // 11  101  0100  0000  000
+    ELR_EL12          = 0xea01, // 11  101  0100  0000  001
+
+    // Cyclone specific system registers
+    CPM_IOACC_CTL_EL3 = 0xff90,
   };
 
   // Note that these do not inherit from AArch64NamedImmMapper. This class is
@@ -1135,25 +1187,23 @@ namespace AArch64SysReg {
   // this one case.
   struct SysRegMapper {
     static const AArch64NamedImmMapper::Mapping SysRegMappings[];
-    static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[];
 
     const AArch64NamedImmMapper::Mapping *InstMappings;
     size_t NumInstMappings;
-    uint64_t FeatureBits;
 
-    SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
-    uint32_t fromString(StringRef Name, bool &Valid) const;
-    std::string toString(uint32_t Bits) const;
+    SysRegMapper() { }
+    uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const;
+    std::string toString(uint32_t Bits, uint64_t FeatureBits) const;
   };
 
   struct MSRMapper : SysRegMapper {
     static const AArch64NamedImmMapper::Mapping MSRMappings[];
-    MSRMapper(uint64_t FeatureBits);
+    MSRMapper();
   };
 
   struct MRSMapper : SysRegMapper {
     static const AArch64NamedImmMapper::Mapping MRSMappings[];
-    MRSMapper(uint64_t FeatureBits);
+    MRSMapper();
   };
 
   uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index ce0aed9..bd1c7af 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -180,7 +180,7 @@ def HasV8Ops    : SubtargetFeature<"v8", "HasV8Ops", "true",
                                    "Support ARM v8 instructions",
                                    [HasV7Ops, FeatureVirtualization,
                                     FeatureMP]>;
-def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
                                    "Support ARM v8.1a instructions",
                                    [HasV8Ops, FeatureAClass, FeatureCRC]>;
 
@@ -260,6 +260,14 @@ def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                     FeatureTrustZone, FeatureT2XtPk,
                                     FeatureCrypto, FeatureCRC]>;
 
+def ProcR4      : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+                                   "Cortex-R4 ARM processors",
+                                   [FeatureHWDiv,
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureDSPThumb2, FeatureT2XtPk,
+                                    HasV7Ops, FeatureDB, FeatureHasRAS,
+                                    FeatureRClass]>;
+
 def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
                                    "Cortex-R5 ARM processors",
                                    [FeatureSlowFPBrcc,
@@ -396,6 +404,16 @@ def : ProcessorModel<"krait",       CortexA9Model,
                                      FeatureDSPThumb2, FeatureHasRAS,
                                      FeatureAClass]>;
 
+// FIXME: R4 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4",   CortexA8Model,
+                                    [ProcR4]>;
+
+// FIXME: R4F has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4f",  CortexA8Model,
+                                    [ProcR4,
+                                     FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+                                     FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>;
+
 // FIXME: R5 has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r5",   CortexA8Model,
                                     [ProcR5, HasV7Ops, FeatureDB,
@@ -457,14 +475,6 @@ def : ProcessorModel<"cyclone",     SwiftModel,
                                      FeatureDB,FeatureDSPThumb2,
                                      FeatureHasRAS, FeatureZCZeroing]>;
 
-// V8.1 Processors
-def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a,
-                                       FeatureDB, FeatureFPARMv8,
-                                       FeatureNEON, FeatureDSPThumb2,
-                                       FeatureHWDiv, FeatureHWDivARM,
-                                       FeatureTrustZone, FeatureT2XtPk,
-                                       FeatureCrypto]>;
-
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -485,7 +495,15 @@ def ARMInstrInfo : InstrInfo;
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def ARMAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 0;
+  bit isMCAsmWriter = 1;
+}
+
 def ARM : Target {
   // Pull in Instruction Info:
   let InstructionSet = ARMInstrInfo;
+  let AssemblyWriters = [ARMAsmWriter];
 }
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 102def1..1a2acf5 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -207,7 +207,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
     << getFunctionNumber() << '_' << uid << '_' << uid2;
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.GetOrCreateSymbol(Name);
 }
 
 
@@ -216,7 +216,7 @@ MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
     << getFunctionNumber();
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.GetOrCreateSymbol(Name);
 }
 
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
@@ -520,28 +520,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     // generates code that does this, it is always safe to set.
     OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
-
-  // Emit a .data.rel section containing any stubs that were created.
-  if (TT.isOSBinFormatELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (auto &stub: Stubs) {
-        OutStreamer.EmitLabel(stub.first);
-        OutStreamer.EmitSymbolValue(stub.second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -597,7 +575,7 @@ void ARMAsmPrinter::emitAttributes() {
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -661,8 +639,8 @@ void ARMAsmPrinter::emitAttributes() {
     // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
     if (STI.hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
-                        STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a:
-                                         ARMBuildAttrs::AllowNeonARMv8);
+                        STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+                                            ARMBuildAttrs::AllowNeonARMv8);
   } else {
     if (STI.hasFPARMv8())
       // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a8c7657..3f79a9b 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -245,11 +245,15 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   // This register should preferably be even (Odd == 0) or odd (Odd == 1).
   // Check if the other part of the pair has already been assigned, and provide
   // the paired register as the first hint.
+  unsigned Paired = Hint.second;
+  if (Paired == 0)
+    return;
+
   unsigned PairedPhys = 0;
-  if (VRM && VRM->hasPhys(Hint.second)) {
-    PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
-    if (PairedPhys && MRI.isReserved(PairedPhys))
-      PairedPhys = 0;
+  if (TargetRegisterInfo::isPhysicalRegister(Paired)) {
+    PairedPhys = Paired;
+  } else if (VRM && VRM->hasPhys(Paired)) {
+    PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this);
   }
 
   // First prefer the paired physreg.
@@ -284,9 +288,14 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
     // change.
     unsigned OtherReg = Hint.second;
     Hint = MRI->getRegAllocationHint(OtherReg);
-    if (Hint.second == Reg)
-      // Make sure the pair has not already divorced.
+    // Make sure the pair has not already divorced.
+    if (Hint.second == Reg) {
       MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+      if (TargetRegisterInfo::isVirtualRegister(NewReg))
+        MRI->setRegAllocationHint(NewReg,
+            Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven
+            : ARMRI::RegPairOdd, OtherReg);
+    }
   }
 }
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 13bef54..36f63e2 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -86,7 +86,7 @@ protected:
   }
 
 public:
-  virtual ~ARMConstantPoolValue();
+  ~ARMConstantPoolValue() override;
 
   ARMCP::ARMCPModifier getModifier() const { return Modifier; }
   const char *getModifierText() const;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 830953b..9d2b09b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -311,6 +311,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     return;
 
   StackAdjustingInsts DefCFAOffsetCandidates;
+  bool HasFP = hasFP(MF);
 
   // Allocate the vararg register save area.
   if (ArgRegsSaveSize) {
@@ -327,6 +328,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       DefCFAOffsetCandidates.addInst(std::prev(MBBI),
                                      NumBytes - ArgRegsSaveSize, true);
     }
+    DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
     return;
   }
 
@@ -375,7 +377,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 
   // Determine starting offsets of spill areas.
-  bool HasFP = hasFP(MF);
   unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
   unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 3b1b8dd..72afd2c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3504,25 +3504,34 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
-  // Try to generate VSEL on ARMv8.
+  // Try to generate VMAXNM/VMINNM on ARMv8.
   if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
                                   TrueVal.getValueType() == MVT::f64)) {
-    // We can select VMAXNM/VMINNM from a compare followed by a select with the
+    // We can use VMAXNM/VMINNM for a compare followed by a select with the
     // same operands, as follows:
-    //   c = fcmp [ogt, olt, ugt, ult] a, b
+    //   c = fcmp [?gt, ?ge, ?lt, ?le] a, b
     //   select c, a, b
-    // We only do this in unsafe-fp-math, because signed zeros and NaNs are
-    // handled differently than the original code sequence.
+    // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
+    // We only do this transformation in UnsafeFPMath and for no-NaNs
+    // comparisons, because signed zeros and NaNs are handled differently than
+    // the original code sequence.
+    // FIXME: There are more cases that can be transformed even with NaNs,
+    // signed zeroes and safe math.  E.g. in the following, the result will be
+    // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too.
+    //   c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0.
+    // FIXME: There is similar code that allows some extensions in
+    // AArch64TargetLowering::LowerSELECT_CC that should be shared with this
+    // code.
     if (getTargetMachine().Options.UnsafeFPMath) {
       if (LHS == TrueVal && RHS == FalseVal) {
-        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+        if (CC == ISD::SETGT || CC == ISD::SETGE)
           return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+        if (CC == ISD::SETLT || CC == ISD::SETLE)
           return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
       } else if (LHS == FalseVal && RHS == TrueVal) {
-        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+        if (CC == ISD::SETLT || CC == ISD::SETLE)
           return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+        if (CC == ISD::SETGT || CC == ISD::SETGE)
           return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
       }
     }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c3984ca..52f3555 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -208,6 +208,8 @@ def HasV8            : Predicate<"Subtarget->hasV8Ops()">,
                                  AssemblerPredicate<"HasV8Ops", "armv8">;
 def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
                                  AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
                                  AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -226,8 +228,6 @@ def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
-def HasV8_1a         : Predicate<"Subtarget->hasV8_1a()">,
-                                 AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
 def HasFP16          : Predicate<"Subtarget->hasFP16()">,
                                  AssemblerPredicate<"FeatureFP16","half-float">;
 def HasDivide        : Predicate<"Subtarget->hasDivide()">,
@@ -388,6 +388,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
 // Immediate operands with a shared generic asm render method.
 class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
 
+// Operands that are part of a memory addressing mode.
+class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
+
 // Branch target.
 // FIXME: rename brtarget to t2_brtarget
 def brtarget : Operand<OtherVT> {
@@ -790,7 +793,7 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
 // addrmode_imm12 := reg +/- imm12
 //
 def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-class AddrMode_Imm12 : Operand<i32>,
+class AddrMode_Imm12 : MemOperand,
                      ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
   // 12-bit immediate operand. Note that instructions using this encode
   // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
@@ -813,7 +816,7 @@ def addrmode_imm12_pre : AddrMode_Imm12 {
 // ldst_so_reg := reg +/- reg shop imm
 //
 def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
-def ldst_so_reg : Operand<i32>,
+def ldst_so_reg : MemOperand,
                   ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
   let EncoderMethod = "getLdStSORegOpValue";
   // FIXME: Simplify the printer
@@ -829,7 +832,7 @@ def ldst_so_reg : Operand<i32>,
 //  {8}       1 is imm8 is non-negative. 0 otherwise.
 //  {7-0}     [0,255] imm8 value.
 def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
-def postidx_imm8 : Operand<i32> {
+def postidx_imm8 : MemOperand {
   let PrintMethod = "printPostIdxImm8Operand";
   let ParserMatchClass = PostIdxImm8AsmOperand;
   let MIOperandInfo = (ops i32imm);
@@ -841,7 +844,7 @@ def postidx_imm8 : Operand<i32> {
 //  {8}       1 is imm8 is non-negative. 0 otherwise.
 //  {7-0}     [0,255] imm8 value, scaled by 4.
 def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
-def postidx_imm8s4 : Operand<i32> {
+def postidx_imm8s4 : MemOperand {
   let PrintMethod = "printPostIdxImm8s4Operand";
   let ParserMatchClass = PostIdxImm8s4AsmOperand;
   let MIOperandInfo = (ops i32imm);
@@ -854,7 +857,7 @@ def PostIdxRegAsmOperand : AsmOperandClass {
   let Name = "PostIdxReg";
   let ParserMethod = "parsePostIdxReg";
 }
-def postidx_reg : Operand<i32> {
+def postidx_reg : MemOperand {
   let EncoderMethod = "getPostIdxRegOpValue";
   let DecoderMethod = "DecodePostIdxReg";
   let PrintMethod = "printPostIdxRegOperand";
@@ -869,7 +872,7 @@ def postidx_reg : Operand<i32> {
 // FIXME: addrmode2 should be refactored the rest of the way to always
 // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
 def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
-def addrmode2 : Operand<i32>,
+def addrmode2 : MemOperand,
                 ComplexPattern<i32, 3, "SelectAddrMode2", []> {
   let EncoderMethod = "getAddrMode2OpValue";
   let PrintMethod = "printAddrMode2Operand";
@@ -881,7 +884,7 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass {
   let Name = "PostIdxRegShifted";
   let ParserMethod = "parsePostIdxReg";
 }
-def am2offset_reg : Operand<i32>,
+def am2offset_reg : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
                 [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -894,7 +897,7 @@ def am2offset_reg : Operand<i32>,
 // FIXME: am2offset_imm should only need the immediate, not the GPR. Having
 // the GPR is purely vestigal at this point.
 def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
-def am2offset_imm : Operand<i32>,
+def am2offset_imm : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
                 [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -909,7 +912,7 @@ def am2offset_imm : Operand<i32>,
 //
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-class AddrMode3 : Operand<i32>,
+class AddrMode3 : MemOperand,
                   ComplexPattern<i32, 3, "SelectAddrMode3", []> {
   let EncoderMethod = "getAddrMode3OpValue";
   let ParserMatchClass = AddrMode3AsmOperand;
@@ -932,7 +935,7 @@ def AM3OffsetAsmOperand : AsmOperandClass {
   let Name = "AM3Offset";
   let ParserMethod = "parseAM3Offset";
 }
-def am3offset : Operand<i32>,
+def am3offset : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode3Offset",
                                [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode3OffsetOpValue";
@@ -951,7 +954,7 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
 // addrmode5 := reg +/- imm8*4
 //
 def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-class AddrMode5 : Operand<i32>,
+class AddrMode5 : MemOperand,
                   ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let EncoderMethod = "getAddrMode5OpValue";
   let DecoderMethod = "DecodeAddrMode5Operand";
@@ -970,7 +973,7 @@ def addrmode5_pre : AddrMode5 {
 // addrmode6 := reg with optional alignment
 //
 def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
-def addrmode6 : Operand<i32>,
+def addrmode6 : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -979,7 +982,7 @@ def addrmode6 : Operand<i32>,
   let ParserMatchClass = AddrMode6AsmOperand;
 }
 
-def am6offset : Operand<i32>,
+def am6offset : MemOperand,
                 ComplexPattern<i32, 1, "SelectAddrMode6Offset",
                                [], [SDNPWantRoot]> {
   let PrintMethod = "printAddrMode6OffsetOperand";
@@ -990,7 +993,7 @@ def am6offset : Operand<i32>,
 
 // Special version of addrmode6 to handle alignment encoding for VST1/VLD1
 // (single element from one lane) for size 32.
-def addrmode6oneL32 : Operand<i32>,
+def addrmode6oneL32 : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -998,7 +1001,7 @@ def addrmode6oneL32 : Operand<i32>,
 }
 
 // Base class for addrmode6 with specific alignment restrictions.
-class AddrMode6Align : Operand<i32>,
+class AddrMode6Align : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -1074,7 +1077,7 @@ def addrmode6align64or128or256 : AddrMode6Align {
 
 // Special version of addrmode6 to handle alignment encoding for VLD-dup
 // instructions, specifically VLD4-dup.
-def addrmode6dup : Operand<i32>,
+def addrmode6dup : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1085,7 +1088,7 @@ def addrmode6dup : Operand<i32>,
 }
 
 // Base class for addrmode6dup with specific alignment restrictions.
-class AddrMode6DupAlign : Operand<i32>,
+class AddrMode6DupAlign : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1149,7 +1152,7 @@ def addrmode6dupalign64or128 : AddrMode6DupAlign {
 
 // addrmodepc := pc + reg
 //
-def addrmodepc : Operand<i32>,
+def addrmodepc : MemOperand,
                  ComplexPattern<i32, 2, "SelectAddrModePC", []> {
   let PrintMethod = "printAddrModePCOperand";
   let MIOperandInfo = (ops GPR, i32imm);
@@ -1158,7 +1161,7 @@ def addrmodepc : Operand<i32>,
 // addr_offset_none := reg
 //
 def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
-def addr_offset_none : Operand<i32>,
+def addr_offset_none : MemOperand,
                        ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
   let PrintMethod = "printAddrMode7Operand";
   let DecoderMethod = "DecodeAddrMode7Operand";
@@ -1417,7 +1420,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
 let isCompare = 1, Defs = [CPSR] in {
 multiclass AI1_cmp_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                       PatFrag opnode, bit Commutable = 0> {
+                       PatFrag opnode, bit Commutable = 0,
+                       string rrDecoderMethod = ""> {
   def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
                opc, "\t$Rn, $imm",
                [(opnode GPR:$Rn, mod_imm:$imm)]>,
@@ -1445,6 +1449,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
     let Inst{15-12} = 0b0000;
     let Inst{11-4} = 0b00000000;
     let Inst{3-0} = Rm;
+    let DecoderMethod = rrDecoderMethod;
 
     let Unpredictable{15-12} = 0b1111;
   }
@@ -4263,6 +4268,30 @@ def CRC32W  : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
 def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
 
 //===----------------------------------------------------------------------===//
+// ARMv8.1a Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan",
+                "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> {
+  bits<1> imm;
+
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010001;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-10} = 0b000000;
+  let Inst{9} = imm;
+  let Inst{8} = 0b0;
+  let Inst{7-4} = 0b0000;
+  let Inst{3-0} = 0b0000;
+
+  let Unpredictable{19-16} = 0b1111;
+  let Unpredictable{15-10} = 0b111111;
+  let Unpredictable{8} = 0b1;
+  let Unpredictable{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
 //  Comparison Instructions...
 //
 
@@ -4366,7 +4395,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm),
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
                         IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
-                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1,
+                      "DecodeTSTInstruction">;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
                         IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
                       BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 3c62e0e..d0ade77 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -142,7 +142,7 @@ def t_blxtarget : Operand<i32> {
 
 // t_addrmode_pc := <label> => pc + imm8 * 4
 //
-def t_addrmode_pc : Operand<i32> {
+def t_addrmode_pc : MemOperand {
   let EncoderMethod = "getAddrModePCOpValue";
   let DecoderMethod = "DecodeThumbAddrModePC";
   let PrintMethod = "printThumbLdrLabelOperand";
@@ -153,7 +153,7 @@ def t_addrmode_pc : Operand<i32> {
 // t_addrmode_rr := reg + reg
 //
 def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
-def t_addrmode_rr : Operand<i32>,
+def t_addrmode_rr : MemOperand,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let PrintMethod = "printThumbAddrModeRROperand";
@@ -169,7 +169,7 @@ def t_addrmode_rr : Operand<i32>,
 // the reg+imm forms will match instead. This is a horrible way to do that,
 // as it forces tight coupling between the methods, but it's how selectiondag
 // currently works.
-def t_addrmode_rrs1 : Operand<i32>,
+def t_addrmode_rrs1 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let PrintMethod = "printThumbAddrModeRROperand";
@@ -177,7 +177,7 @@ def t_addrmode_rrs1 : Operand<i32>,
   let ParserMatchClass = t_addrmode_rr_asm_operand;
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
-def t_addrmode_rrs2 : Operand<i32>,
+def t_addrmode_rrs2 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -185,7 +185,7 @@ def t_addrmode_rrs2 : Operand<i32>,
   let ParserMatchClass = t_addrmode_rr_asm_operand;
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
-def t_addrmode_rrs4 : Operand<i32>,
+def t_addrmode_rrs4 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -197,7 +197,7 @@ def t_addrmode_rrs4 : Operand<i32>,
 // t_addrmode_is4 := reg + imm5 * 4
 //
 def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
-def t_addrmode_is4 : Operand<i32>,
+def t_addrmode_is4 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -209,7 +209,7 @@ def t_addrmode_is4 : Operand<i32>,
 // t_addrmode_is2 := reg + imm5 * 2
 //
 def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
-def t_addrmode_is2 : Operand<i32>,
+def t_addrmode_is2 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -221,7 +221,7 @@ def t_addrmode_is2 : Operand<i32>,
 // t_addrmode_is1 := reg + imm5
 //
 def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
-def t_addrmode_is1 : Operand<i32>,
+def t_addrmode_is1 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -235,7 +235,7 @@ def t_addrmode_is1 : Operand<i32>,
 // FIXME: This really shouldn't have an explicit SP operand at all. It should
 // be implicit, just like in the instruction encoding itself.
 def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
-def t_addrmode_sp : Operand<i32>,
+def t_addrmode_sp : MemOperand,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
   let EncoderMethod = "getAddrModeThumbSPOpValue";
   let DecoderMethod = "DecodeThumbAddrModeSP";
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 10b0a0e..103ee00 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -148,7 +148,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 
 // t2addrmode_imm12  := reg + imm12
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
-def t2addrmode_imm12 : Operand<i32>,
+def t2addrmode_imm12 : MemOperand,
                        ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
   let PrintMethod = "printAddrModeImm12Operand<false>";
   let EncoderMethod = "getAddrModeImm12OpValue";
@@ -178,7 +178,7 @@ def t2adrlabel : Operand<i32> {
 
 // t2addrmode_posimm8  := reg + imm8
 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
-def t2addrmode_posimm8 : Operand<i32> {
+def t2addrmode_posimm8 : MemOperand {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
   let EncoderMethod = "getT2AddrModeImm8OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -188,7 +188,7 @@ def t2addrmode_posimm8 : Operand<i32> {
 
 // t2addrmode_negimm8  := reg - imm8
 def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
-def t2addrmode_negimm8 : Operand<i32>,
+def t2addrmode_negimm8 : MemOperand,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
   let EncoderMethod = "getT2AddrModeImm8OpValue";
@@ -199,7 +199,7 @@ def t2addrmode_negimm8 : Operand<i32>,
 
 // t2addrmode_imm8  := reg +/- imm8
 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
-class T2AddrMode_Imm8 : Operand<i32>,
+class T2AddrMode_Imm8 : MemOperand,
                         ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let EncoderMethod = "getT2AddrModeImm8OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -215,7 +215,7 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 {
   let PrintMethod = "printT2AddrModeImm8Operand<true>";
 }
 
-def t2am_imm8_offset : Operand<i32>,
+def t2am_imm8_offset : MemOperand,
                        ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
                                       [], [SDNPWantRoot]> {
   let PrintMethod = "printT2AddrModeImm8OffsetOperand";
@@ -225,7 +225,7 @@ def t2am_imm8_offset : Operand<i32>,
 
 // t2addrmode_imm8s4  := reg +/- (imm8 << 2)
 def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : Operand<i32> {
+class T2AddrMode_Imm8s4 : MemOperand {
   let EncoderMethod = "getT2AddrModeImm8s4OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8s4";
   let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -241,7 +241,7 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
 }
 
 def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
-def t2am_imm8s4_offset : Operand<i32> {
+def t2am_imm8s4_offset : MemOperand {
   let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
   let EncoderMethod = "getT2Imm8s4OpValue";
   let DecoderMethod = "DecodeT2Imm8S4";
@@ -251,7 +251,7 @@ def t2am_imm8s4_offset : Operand<i32> {
 def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
   let Name = "MemImm0_1020s4Offset";
 }
-def t2addrmode_imm0_1020s4 : Operand<i32>,
+def t2addrmode_imm0_1020s4 : MemOperand,
                          ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> {
   let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
   let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
@@ -262,7 +262,7 @@ def t2addrmode_imm0_1020s4 : Operand<i32>,
 
 // t2addrmode_so_reg  := reg + (reg << imm2)
 def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
-def t2addrmode_so_reg : Operand<i32>,
+def t2addrmode_so_reg : MemOperand,
                         ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
   let PrintMethod = "printT2AddrModeSoRegOperand";
   let EncoderMethod = "getT2AddrModeSORegOpValue";
@@ -273,13 +273,13 @@ def t2addrmode_so_reg : Operand<i32>,
 
 // Addresses for the TBB/TBH instructions.
 def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
-def addrmode_tbb : Operand<i32> {
+def addrmode_tbb : MemOperand {
   let PrintMethod = "printAddrModeTBB";
   let ParserMatchClass = addrmode_tbb_asmoperand;
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
 }
 def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
-def addrmode_tbh : Operand<i32> {
+def addrmode_tbh : MemOperand {
   let PrintMethod = "printAddrModeTBH";
   let ParserMatchClass = addrmode_tbh_asmoperand;
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
@@ -3630,8 +3630,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
 
 // Branch and Exchange Jazelle -- for disassembly only
 // Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
-    Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> {
+def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
+    Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> {
   bits<4> func;
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0;
@@ -4281,6 +4281,23 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
 
 
 //===----------------------------------------------------------------------===//
+// ARMv8.1 Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>,
+               T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> {
+  bits<1> imm;
+
+  let Inst{4} = 0b1;
+  let Inst{3} = imm;
+  let Inst{2-0} = 0b000;
+
+  let Unpredictable{4} = 0b1;
+  let Unpredictable{2-0} = 0b111;
+}
+
+//===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
 
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index fbec9e6..2a3e1da 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -133,6 +133,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasV6T2Ops = false;
   HasV7Ops = false;
   HasV8Ops = false;
+  HasV8_1aOps = false;
   HasVFPv2 = false;
   HasVFPv3 = false;
   HasVFPv4 = false;
@@ -166,7 +167,6 @@ void ARMSubtarget::initializeEnvironment() {
   HasTrustZone = false;
   HasCrypto = false;
   HasCRC = false;
-  HasV8_1a = false;
   HasZeroCycleZeroing = false;
   AllowsUnalignedMem = false;
   Thumb2DSP = false;
@@ -191,7 +191,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -254,7 +254,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 
   switch (IT) {
   case DefaultIT:
-    RestrictIT = hasV8Ops() ? true : false;
+    RestrictIT = hasV8Ops();
     break;
   case RestrictedIT:
     RestrictIT = true;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f36cd5c..d82314d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -43,7 +43,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
     Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
-    CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, 
+    CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
   };
   enum ARMProcClassEnum {
     None, AClass, RClass, MClass
@@ -67,6 +67,7 @@ protected:
   bool HasV6T2Ops;
   bool HasV7Ops;
   bool HasV8Ops;
+  bool HasV8_1aOps;
 
   /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
   /// floating point ISAs are supported.
@@ -182,9 +183,6 @@ protected:
   /// HasCRC - if true, processor supports CRC instructions
   bool HasCRC;
 
-  /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions
-  bool HasV8_1a;
-
   /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
   /// particularly effective at zeroing a VFP register.
   bool HasZeroCycleZeroing;
@@ -295,6 +293,7 @@ public:
   bool hasV6T2Ops() const { return HasV6T2Ops; }
   bool hasV7Ops()   const { return HasV7Ops;  }
   bool hasV8Ops()   const { return HasV8Ops;  }
+  bool hasV8_1aOps() const { return HasV8_1aOps; }
 
   bool isCortexA5() const { return ARMProcFamily == CortexA5; }
   bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -316,7 +315,6 @@ public:
   bool hasNEON() const { return HasNEON;  }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
-  bool hasV8_1a() const { return HasV8_1a; }
   bool hasVirtualization() const { return HasVirtualization; }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 1bee1b0..ae33340 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -42,6 +42,11 @@ EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
                       cl::desc("Enable ARM load/store optimization pass"),
                       cl::init(true));
 
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("arm-global-merge", cl::Hidden,
+                  cl::desc("Enable the global merge pass"));
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -332,7 +337,9 @@ void ARMPassConfig::addIRPasses() {
 }
 
 bool ARMPassConfig::addPreISel() {
-  if (TM->getOptLevel() == CodeGenOpt::Aggressive)
+  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+       EnableGlobalMerge == cl::BOU_UNSET) ||
+      EnableGlobalMerge == cl::BOU_TRUE)
     // FIXME: This is using the thumb1 only constant value for
     // maximal global offset for merging globals. We may want
     // to look into using the old value for non-thumb1 code of
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2215efb..b9ad2c8 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -276,8 +276,8 @@ class ARMAsmParser : public MCTargetAsmParser {
   bool hasD16() const {
     return STI.getFeatureBits() & ARM::FeatureD16;
   }
-  bool hasV8_1a() const {
-    return STI.getFeatureBits() & ARM::FeatureV8_1a;
+  bool hasV8_1aOps() const {
+    return STI.getFeatureBits() & ARM::HasV8_1aOps;
   }
 
   void SwitchMode() {
@@ -5418,47 +5418,44 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
 /// inclusion of carry set or predication code operands.
 //
 // FIXME: It would be nice to autogen this.
-void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
-                     bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
-  if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+                                         bool &CanAcceptCarrySet,
+                                         bool &CanAcceptPredicationCode) {
+  CanAcceptCarrySet =
+      Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
-      Mnemonic == "add" || Mnemonic == "adc" ||
-      Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
-      Mnemonic == "orr" || Mnemonic == "mvn" ||
-      Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
-      Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
-      Mnemonic == "vfm" || Mnemonic == "vfnm" ||
-      (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
-                      Mnemonic == "mla" || Mnemonic == "smlal" ||
-                      Mnemonic == "umlal" || Mnemonic == "umull"))) {
-    CanAcceptCarrySet = true;
-  } else
-    CanAcceptCarrySet = false;
+      Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" ||
+      Mnemonic == "bic" || Mnemonic == "asr" || Mnemonic == "orr" ||
+      Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" ||
+      Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" ||
+      Mnemonic == "neg" || Mnemonic == "vfm" || Mnemonic == "vfnm" ||
+      (!isThumb() &&
+       (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" ||
+        Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"));
 
   if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
-      Mnemonic == "cps" ||  Mnemonic == "it" ||  Mnemonic == "cbz" ||
+      Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
       Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
       Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
-      Mnemonic.startswith("vsel") ||
-      Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
-      Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
-      Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
-      Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" ||
+      Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" ||
+      Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" ||
+      Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" ||
+      Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" ||
+      Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
       Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
       (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
     // Some instructions are only predicable in Thumb mode
-    CanAcceptPredicationCode
-      = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
+    CanAcceptPredicationCode =
+        Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
         Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" &&
         Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" &&
         Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" &&
-        Mnemonic != "ldc2" && Mnemonic != "ldc2l" &&
-        Mnemonic != "stc2" && Mnemonic != "stc2l" &&
-        !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
+        Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" &&
+        Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") &&
+        !Mnemonic.startswith("srs");
   } else if (isThumbOne()) {
     if (hasV6MOps())
       CanAcceptPredicationCode = Mnemonic != "movs";
@@ -6153,6 +6150,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                    "destination operands can't be identical");
     return false;
   }
+  case ARM::t2BXJ: {
+    const unsigned RmReg = Inst.getOperand(0).getReg();
+    // Rm = SP is no longer unpredictable in v8-A
+    if (RmReg == ARM::SP && !hasV8Ops())
+      return Error(Operands[2]->getStartLoc(),
+                   "r13 (SP) is an unpredictable operand to BXJ");
+    return false;
+  }
   case ARM::STRD: {
     // Rt2 must be Rt + 1.
     unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4d5122a..4c169a8 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -91,7 +91,7 @@ public:
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ARMDisassembler() {}
+  ~ARMDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -106,7 +106,7 @@ public:
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ThumbDisassembler() {}
+  ~ThumbDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -212,6 +212,10 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
@@ -2119,6 +2123,54 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+  if (Pred == 0xF)
+    return DecodeSETPANInstruction(Inst, Insn, Address, Decoder);
+
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Imm = fieldFromInstruction(Insn, 9, 1);
+
+  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+  uint64_t FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
+  if ((FeatureBits & ARM::HasV8_1aOps) == 0 || 
+      (FeatureBits & ARM::HasV8Ops) == 0 )
+    return MCDisassembler::Fail;
+
+  // Decoder can be called from DecodeTST, which does not check the full
+  // encoding is valid.
+  if (fieldFromInstruction(Insn, 20,12) != 0xf11 ||
+      fieldFromInstruction(Insn, 4,4) != 0)
+    return MCDisassembler::Fail;
+  if (fieldFromInstruction(Insn, 10,10) != 0 ||
+      fieldFromInstruction(Insn, 0,4) != 0)
+    S = MCDisassembler::SoftFail;
+
+  Inst.setOpcode(ARM::SETPAN);
+  Inst.addOperand(MCOperand::CreateImm(Imm));
+
+  return S;
+}
+
 static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e15323d..c2e1b2a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -40,12 +40,12 @@ static unsigned translateShiftImm(unsigned imm) {
 
 /// Prints the shift value with an immediate value.
 static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
-                          unsigned ShImm, bool UseMarkup) {
+                             unsigned ShImm, bool UseMarkup) {
   if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
     return;
   O << ", ";
 
-  assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+  assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
   O << getShiftOpcStr(ShOpc);
 
   if (ShOpc != ARM_AM::rrx) {
@@ -58,49 +58,52 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
   }
 }
 
-ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
-                               const MCInstrInfo &MII,
-                               const MCRegisterInfo &MRI,
-                               const MCSubtargetInfo &STI) :
-  MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                               const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << markup("<reg:")
-     << getRegisterName(RegNo)
-     << markup(">");
+  OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
 }
 
 void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   unsigned Opcode = MI->getOpcode();
 
-  switch(Opcode) {
+  switch (Opcode) {
 
   // Check for HINT instructions w/ canonical names.
   case ARM::HINT:
   case ARM::tHINT:
   case ARM::t2HINT:
     switch (MI->getOperand(0).getImm()) {
-    case 0: O << "\tnop"; break;
-    case 1: O << "\tyield"; break;
-    case 2: O << "\twfe"; break;
-    case 3: O << "\twfi"; break;
-    case 4: O << "\tsev"; break;
+    case 0:
+      O << "\tnop";
+      break;
+    case 1:
+      O << "\tyield";
+      break;
+    case 2:
+      O << "\twfe";
+      break;
+    case 3:
+      O << "\twfi";
+      break;
+    case 4:
+      O << "\tsev";
+      break;
     case 5:
-      if ((getAvailableFeatures() & ARM::HasV8Ops)) {
+      if ((STI.getFeatureBits() & ARM::HasV8Ops)) {
         O << "\tsevl";
         break;
       } // Fallthrough for non-v8
     default:
       // Anything else should just print normally.
-      printInstruction(MI, O);
+      printInstruction(MI, STI, O);
       printAnnotation(O, Annot);
       return;
     }
-    printPredicateOperand(MI, 1, O);
+    printPredicateOperand(MI, 1, STI, O);
     if (Opcode == ARM::t2HINT)
       O << ".w";
     printAnnotation(O, Annot);
@@ -115,8 +118,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     const MCOperand &MO3 = MI->getOperand(3);
 
     O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
-    printSBitModifierOperand(MI, 6, O);
-    printPredicateOperand(MI, 4, O);
+    printSBitModifierOperand(MI, 6, STI, O);
+    printPredicateOperand(MI, 4, STI, O);
 
     O << '\t';
     printRegName(O, Dst.getReg());
@@ -137,8 +140,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     const MCOperand &MO2 = MI->getOperand(2);
 
     O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
-    printSBitModifierOperand(MI, 5, O);
-    printPredicateOperand(MI, 3, O);
+    printSBitModifierOperand(MI, 5, STI, O);
+    printPredicateOperand(MI, 3, STI, O);
 
     O << '\t';
     printRegName(O, Dst.getReg());
@@ -150,10 +153,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
       return;
     }
 
-    O << ", "
-      << markup("<imm:")
-      << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
-      << markup(">");
+    O << ", " << markup("<imm:") << "#"
+      << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
     printAnnotation(O, Annot);
     return;
   }
@@ -164,11 +165,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
       // Should only print PUSH if there are at least two registers in the list.
       O << '\t' << "push";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       if (Opcode == ARM::t2STMDB_UPD)
         O << ".w";
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -178,7 +179,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(2).getReg() == ARM::SP &&
         MI->getOperand(3).getImm() == -4) {
       O << '\t' << "push";
-      printPredicateOperand(MI, 4, O);
+      printPredicateOperand(MI, 4, STI, O);
       O << "\t{";
       printRegName(O, MI->getOperand(1).getReg());
       O << "}";
@@ -193,11 +194,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
       // Should only print POP if there are at least two registers in the list.
       O << '\t' << "pop";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       if (Opcode == ARM::t2LDMIA_UPD)
         O << ".w";
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -207,7 +208,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(2).getReg() == ARM::SP &&
         MI->getOperand(4).getImm() == 4) {
       O << '\t' << "pop";
-      printPredicateOperand(MI, 5, O);
+      printPredicateOperand(MI, 5, STI, O);
       O << "\t{";
       printRegName(O, MI->getOperand(0).getReg());
       O << "}";
@@ -221,9 +222,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   case ARM::VSTMDDB_UPD:
     if (MI->getOperand(0).getReg() == ARM::SP) {
       O << '\t' << "vpush";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -234,9 +235,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   case ARM::VLDMDIA_UPD:
     if (MI->getOperand(0).getReg() == ARM::SP) {
       O << '\t' << "vpop";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -252,12 +253,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
     O << "\tldm";
 
-    printPredicateOperand(MI, 1, O);
+    printPredicateOperand(MI, 1, STI, O);
     O << '\t';
     printRegName(O, BaseReg);
-    if (Writeback) O << "!";
+    if (Writeback)
+      O << "!";
     O << ", ";
-    printRegisterList(MI, 3, O);
+    printRegisterList(MI, 3, STI, O);
     printAnnotation(O, Annot);
     return;
   }
@@ -268,9 +270,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   // GPRs. However, when decoding them, the two GRPs cannot be automatically
   // expressed as a GPRPair, so we have to manually merge them.
   // FIXME: We would really like to be able to tablegen'erate this.
-  case ARM::LDREXD: case ARM::STREXD:
-  case ARM::LDAEXD: case ARM::STLEXD: {
-    const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+  case ARM::LDREXD:
+  case ARM::STREXD:
+  case ARM::LDAEXD:
+  case ARM::STLEXD: {
+    const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID);
     bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
     unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
     if (MRC.contains(Reg)) {
@@ -280,28 +284,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
       if (isStore)
         NewMI.addOperand(MI->getOperand(0));
-      NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
-        &MRI.getRegClass(ARM::GPRPairRegClassID)));
+      NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(
+          Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID)));
       NewMI.addOperand(NewReg);
 
       // Copy the rest operands into NewMI.
-      for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+      for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
         NewMI.addOperand(MI->getOperand(i));
-      printInstruction(&NewMI, O);
+      printInstruction(&NewMI, STI, O);
       return;
     }
     break;
   }
-    // B9.3.3 ERET (Thumb)
-    // For a target that has Virtualization Extensions, ERET is the preferred
-    // disassembly of SUBS PC, LR, #0
+  // B9.3.3 ERET (Thumb)
+  // For a target that has Virtualization Extensions, ERET is the preferred
+  // disassembly of SUBS PC, LR, #0
   case ARM::t2SUBS_PC_LR: {
-    if (MI->getNumOperands() == 3 &&
-        MI->getOperand(0).isImm() &&
+    if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() &&
         MI->getOperand(0).getImm() == 0 &&
-        (getAvailableFeatures() & ARM::FeatureVirtualization)) {
+        (STI.getFeatureBits() & ARM::FeatureVirtualization)) {
       O << "\teret";
-      printPredicateOperand(MI, 1, O);
+      printPredicateOperand(MI, 1, STI, O);
       printAnnotation(O, Annot);
       return;
     }
@@ -309,20 +312,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   }
   }
 
-  printInstruction(MI, O);
+  printInstruction(MI, STI, O);
   printAnnotation(O, Annot);
 }
 
 void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
     printRegName(O, Reg);
   } else if (Op.isImm()) {
-    O << markup("<imm:")
-      << '#' << formatImm(Op.getImm())
-      << markup(">");
+    O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     const MCExpr *Expr = Op.getExpr();
@@ -354,6 +355,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   if (MO1.isExpr()) {
@@ -370,13 +372,9 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << markup("<imm:")
-      << "#-" << formatImm(-OffImm)
-      << markup(">");
+    O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
   } else {
-    O << markup("<imm:")
-      << "#" << formatImm(OffImm)
-      << markup(">");
+    O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
   }
   O << "]" << markup(">");
 }
@@ -387,10 +385,11 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
 //    REG REG 0,SH_OPC    - e.g. R5, ROR R3
 //    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
 void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
 
   printRegName(O, MO1.getReg());
 
@@ -406,9 +405,10 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   printRegName(O, MO1.getReg());
 
@@ -417,28 +417,25 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
                    ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
 }
 
-
 //===--------------------------------------------------------------------===//
 // Addressing Mode #2
 //===--------------------------------------------------------------------===//
 
 void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
 
   if (!MO2.getReg()) {
     if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
-      O << ", "
-        << markup("<imm:")
-        << "#"
+      O << ", " << markup("<imm:") << "#"
         << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-        << ARM_AM::getAM2Offset(MO3.getImm())
-        << markup(">");
+        << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
     }
     O << "]" << markup(">");
     return;
@@ -454,9 +451,10 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   O << ", ";
@@ -465,9 +463,10 @@ void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   O << ", ";
@@ -476,35 +475,35 @@ void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
 #ifndef NDEBUG
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
   unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
-  assert(IdxMode != ARMII::IndexModePost &&
-         "Should be pre or offset index op");
+  assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op");
 #endif
 
-  printAM2PreOrOffsetIndexOp(MI, Op, O);
+  printAM2PreOrOffsetIndexOp(MI, Op, STI, O);
 }
 
 void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    O << markup("<imm:")
-      << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
-      << ImmOffs
+    O << markup("<imm:") << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
       << markup(">");
     return;
   }
@@ -524,8 +523,8 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
                                                 raw_ostream &O,
                                                 bool AlwaysPrintImm0) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
 
   O << markup("<mem:") << '[';
   printRegName(O, MO1.getReg());
@@ -537,16 +536,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
     return;
   }
 
-  //If the op is sub we have to print the immediate even if it is 0
+  // If the op is sub we have to print the immediate even if it is 0
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
   ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
 
   if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
-    O << ", "
-      << markup("<imm:")
-      << "#"
-      << ARM_AM::getAddrOpcStr(op)
-      << ImmOffs
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
       << markup(">");
   }
   O << ']' << markup(">");
@@ -554,10 +549,11 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  if (!MO1.isReg()) {   //  For label symbolic references.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
@@ -569,9 +565,10 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
 
 void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   if (MO1.getReg()) {
     O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
@@ -580,56 +577,56 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
   }
 
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  O << markup("<imm:")
-    << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+  O << markup("<imm:") << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
     << markup(">");
 }
 
-void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
-                                             unsigned OpNum,
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+                                             const MCSubtargetInfo &STI,
                                              raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   unsigned Imm = MO.getImm();
-  O << markup("<imm:")
-    << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
     << markup(">");
 }
 
 void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << (MO2.getImm() ? "" : "-");
   printRegName(O, MO1.getReg());
 }
 
-void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
-                                             unsigned OpNum,
-                                             raw_ostream &O) {
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   unsigned Imm = MO.getImm();
-  O << markup("<imm:")
-    << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
     << markup(">");
 }
 
-
 void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
-  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
-                                                 .getImm());
+  ARM_AM::AMSubMode Mode =
+      ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm());
   O << ARM_AM::getAMSubModeStr(Mode);
 }
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -639,20 +636,17 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
   ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
   if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
-    O << ", "
-      << markup("<imm:")
-      << "#"
-      << ARM_AM::getAddrOpcStr(Op)
-      << ImmOffs * 4
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
+      << ImmOffs * 4 << markup(">");
   }
   O << "]" << markup(">");
 }
 
 void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -663,6 +657,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   O << markup("<mem:") << "[";
@@ -672,6 +667,7 @@ void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   if (MO.getReg() == 0)
@@ -684,49 +680,47 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
 
 void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
                                                     unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   uint32_t v = ~MO.getImm();
   int32_t lsb = countTrailingZeros(v);
-  int32_t width = (32 - countLeadingZeros (v)) - lsb;
+  int32_t width = (32 - countLeadingZeros(v)) - lsb;
   assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
-  O << markup("<imm:") << '#' << lsb << markup(">")
-    << ", "
-    << markup("<imm:") << '#' << width << markup(">");
+  O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
+    << '#' << width << markup(">");
 }
 
 void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops));
+  O << ARM_MB::MemBOptToString(val, (STI.getFeatureBits() & ARM::HasV8Ops));
 }
 
 void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned val = MI->getOperand(OpNum).getImm();
   O << ARM_ISB::InstSyncBOptToString(val);
 }
 
 void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned ShiftOp = MI->getOperand(OpNum).getImm();
   bool isASR = (ShiftOp & (1 << 5)) != 0;
   unsigned Amt = ShiftOp & 0x1f;
   if (isASR) {
-    O << ", asr "
-      << markup("<imm:")
-      << "#" << (Amt == 0 ? 32 : Amt)
-      << markup(">");
-  }
-  else if (Amt) {
-    O << ", lsl "
-      << markup("<imm:")
-      << "#" << Amt
+    O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
       << markup(">");
+  } else if (Amt) {
+    O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
   }
 }
 
 void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   if (Imm == 0)
@@ -736,6 +730,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   // A shift amount of 32 is encoded as 0.
@@ -746,16 +741,19 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   O << "{";
   for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != OpNum) O << ", ";
+    if (i != OpNum)
+      O << ", ";
     printRegName(O, MI->getOperand(i).getReg());
   }
   O << "}";
 }
 
 void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
@@ -763,8 +761,8 @@ void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
   printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
 }
 
-
 void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   if (Op.getImm())
@@ -774,16 +772,16 @@ void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   O << ARM_PROC::IModToString(Op.getImm());
 }
 
 void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   unsigned IFlags = Op.getImm();
-  for (int i=2; i >= 0; --i)
+  for (int i = 2; i >= 0; --i)
     if (IFlags & (1 << i))
       O << ARM_PROC::IFlagsToString(1 << i);
 
@@ -792,11 +790,12 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   unsigned SpecRegRBit = Op.getImm() >> 4;
   unsigned Mask = Op.getImm() & 0xf;
-  uint64_t FeatureBits = getAvailableFeatures();
+  uint64_t FeatureBits = STI.getFeatureBits();
 
   if (FeatureBits & ARM::FeatureMClass) {
     unsigned SYSm = Op.getImm();
@@ -805,14 +804,30 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
     // For writes, handle extended mask bits if the DSP extension is present.
     if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) {
       switch (SYSm) {
-      case 0x400: O << "apsr_g"; return;
-      case 0xc00: O << "apsr_nzcvqg"; return;
-      case 0x401: O << "iapsr_g"; return;
-      case 0xc01: O << "iapsr_nzcvqg"; return;
-      case 0x402: O << "eapsr_g"; return;
-      case 0xc02: O << "eapsr_nzcvqg"; return;
-      case 0x403: O << "xpsr_g"; return;
-      case 0xc03: O << "xpsr_nzcvqg"; return;
+      case 0x400:
+        O << "apsr_g";
+        return;
+      case 0xc00:
+        O << "apsr_nzcvqg";
+        return;
+      case 0x401:
+        O << "iapsr_g";
+        return;
+      case 0xc01:
+        O << "iapsr_nzcvqg";
+        return;
+      case 0x402:
+        O << "eapsr_g";
+        return;
+      case 0xc02:
+        O << "eapsr_nzcvqg";
+        return;
+      case 0x403:
+        O << "xpsr_g";
+        return;
+      case 0xc03:
+        O << "xpsr_nzcvqg";
+        return;
       }
     }
 
@@ -823,29 +838,66 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
       // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
       // alias for MSR APSR_nzcvq.
       switch (SYSm) {
-      case 0: O << "apsr_nzcvq"; return;
-      case 1: O << "iapsr_nzcvq"; return;
-      case 2: O << "eapsr_nzcvq"; return;
-      case 3: O << "xpsr_nzcvq"; return;
+      case 0:
+        O << "apsr_nzcvq";
+        return;
+      case 1:
+        O << "iapsr_nzcvq";
+        return;
+      case 2:
+        O << "eapsr_nzcvq";
+        return;
+      case 3:
+        O << "xpsr_nzcvq";
+        return;
       }
     }
 
     switch (SYSm) {
-    default: llvm_unreachable("Unexpected mask value!");
-    case  0: O << "apsr"; return;
-    case  1: O << "iapsr"; return;
-    case  2: O << "eapsr"; return;
-    case  3: O << "xpsr"; return;
-    case  5: O << "ipsr"; return;
-    case  6: O << "epsr"; return;
-    case  7: O << "iepsr"; return;
-    case  8: O << "msp"; return;
-    case  9: O << "psp"; return;
-    case 16: O << "primask"; return;
-    case 17: O << "basepri"; return;
-    case 18: O << "basepri_max"; return;
-    case 19: O << "faultmask"; return;
-    case 20: O << "control"; return;
+    default:
+      llvm_unreachable("Unexpected mask value!");
+    case 0:
+      O << "apsr";
+      return;
+    case 1:
+      O << "iapsr";
+      return;
+    case 2:
+      O << "eapsr";
+      return;
+    case 3:
+      O << "xpsr";
+      return;
+    case 5:
+      O << "ipsr";
+      return;
+    case 6:
+      O << "epsr";
+      return;
+    case 7:
+      O << "iepsr";
+      return;
+    case 8:
+      O << "msp";
+      return;
+    case 9:
+      O << "psp";
+      return;
+    case 16:
+      O << "primask";
+      return;
+    case 17:
+      O << "basepri";
+      return;
+    case 18:
+      O << "basepri_max";
+      return;
+    case 19:
+      O << "faultmask";
+      return;
+    case 20:
+      O << "control";
+      return;
     }
   }
 
@@ -854,10 +906,17 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
   if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
     O << "APSR_";
     switch (Mask) {
-    default: llvm_unreachable("Unexpected mask value!");
-    case 4:  O << "g"; return;
-    case 8:  O << "nzcvq"; return;
-    case 12: O << "nzcvqg"; return;
+    default:
+      llvm_unreachable("Unexpected mask value!");
+    case 4:
+      O << "g";
+      return;
+    case 8:
+      O << "nzcvq";
+      return;
+    case 12:
+      O << "nzcvqg";
+      return;
     }
   }
 
@@ -868,14 +927,19 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
 
   if (Mask) {
     O << '_';
-    if (Mask & 8) O << 'f';
-    if (Mask & 4) O << 's';
-    if (Mask & 2) O << 'x';
-    if (Mask & 1) O << 'c';
+    if (Mask & 8)
+      O << 'f';
+    if (Mask & 4)
+      O << 's';
+    if (Mask & 2)
+      O << 'x';
+    if (Mask & 1)
+      O << 'c';
   }
 }
 
 void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint32_t Banked = MI->getOperand(OpNum).getImm();
   uint32_t R = (Banked & 0x20) >> 5;
@@ -886,25 +950,40 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
   if (R) {
     O << "SPSR_";
 
-    switch(SysM) {
-    case 0x0e: O << "fiq"; return;
-    case 0x10: O << "irq"; return;
-    case 0x12: O << "svc"; return;
-    case 0x14: O << "abt"; return;
-    case 0x16: O << "und"; return;
-    case 0x1c: O << "mon"; return;
-    case 0x1e: O << "hyp"; return;
-    default: llvm_unreachable("Invalid banked SPSR register");
+    switch (SysM) {
+    case 0x0e:
+      O << "fiq";
+      return;
+    case 0x10:
+      O << "irq";
+      return;
+    case 0x12:
+      O << "svc";
+      return;
+    case 0x14:
+      O << "abt";
+      return;
+    case 0x16:
+      O << "und";
+      return;
+    case 0x1c:
+      O << "mon";
+      return;
+    case 0x1e:
+      O << "hyp";
+      return;
+    default:
+      llvm_unreachable("Invalid banked SPSR register");
     }
   }
 
   assert(!R && "should have dealt with SPSR regs");
   const char *RegNames[] = {
-    "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "",
-    "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "",
-    "lr_irq", "sp_irq", "lr_svc",  "sp_svc",  "lr_abt",  "sp_abt", "lr_und", "sp_und",
-    "",       "",       "",        "",        "lr_mon",  "sp_mon", "elr_hyp", "sp_hyp"
-  };
+      "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr",  "lr_usr",
+      "",       "r8_fiq", "r9_fiq",  "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq",
+      "lr_fiq", "",       "lr_irq",  "sp_irq",  "lr_svc",  "sp_svc",  "lr_abt",
+      "sp_abt", "lr_und", "sp_und",  "",        "",        "",        "",
+      "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"};
   const char *Name = RegNames[SysM];
   assert(Name[0] && "invalid banked register operand");
 
@@ -912,6 +991,7 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   // Handle the undefined 15 CC value here for printing so we don't abort().
@@ -923,12 +1003,14 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
                                                     unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   O << ARMCondCodeToString(CC);
 }
 
 void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   if (MI->getOperand(OpNum).getReg()) {
     assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
@@ -938,33 +1020,38 @@ void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   O << "p" << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   O << "c" << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << "{" << MI->getOperand(OpNum).getImm() << "}";
 }
 
 void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
 }
 
-template<unsigned scale>
+template <unsigned scale>
 void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
 
   if (MO.isExpr()) {
@@ -985,25 +1072,26 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+  O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
     << markup(">");
 }
 
 void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:")
-    << "#" << formatImm((Imm == 0 ? 32 : Imm))
+  O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
     << markup(">");
 }
 
 void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   // (3 - the number of trailing zeros) is the number of then / else.
   unsigned Mask = MI->getOperand(OpNum).getImm();
-  unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
+  unsigned Firstcond = MI->getOperand(OpNum - 1).getImm();
   unsigned CondBit0 = Firstcond & 1;
   unsigned NumTZ = countTrailingZeros(Mask);
   assert(NumTZ <= 3 && "Invalid IT mask!");
@@ -1017,12 +1105,13 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
@@ -1037,22 +1126,21 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
 
 void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
                                                     unsigned Op,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O,
                                                     unsigned Scale) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   if (unsigned ImmOffs = MO2.getImm()) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(ImmOffs * Scale)
+    O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
       << markup(">");
   }
   O << "]" << markup(">");
@@ -1060,25 +1148,29 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
 
 void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1);
 }
 
 void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2);
 }
 
 void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
 }
 
 void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
 }
 
 // Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
@@ -1086,9 +1178,10 @@ void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
 // REG 0   0           - e.g. R5
 // REG IMM, SH_OPC     - e.g. R5, LSL #3
 void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   unsigned Reg = MO1.getReg();
   printRegName(O, Reg);
@@ -1101,12 +1194,13 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -1119,26 +1213,20 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << formatImm(-OffImm)
-      << markup(">");
-  }
-  else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(OffImm)
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+  } else if (AlwaysPrintImm0 || OffImm > 0) {
+    O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
   }
   O << "]" << markup(">");
 }
 
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -1149,28 +1237,23 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << -OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
   } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
   }
   O << "]" << markup(">");
 }
 
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
                                                   unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
                                                   raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   //  For label symbolic references.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -1186,39 +1269,31 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << -OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
   } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
   }
   O << "]" << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
-                                                       unsigned OpNum,
-                                                       raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   if (MO2.getImm()) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(MO2.getImm() * 4)
+    O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
       << markup(">");
   }
   O << "]" << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   int32_t OffImm = (int32_t)MO1.getImm();
   O << ", " << markup("<imm:");
@@ -1231,9 +1306,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
   O << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   int32_t OffImm = (int32_t)MO1.getImm();
 
@@ -1251,10 +1326,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
 
 void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -1266,71 +1342,74 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
   unsigned ShAmt = MO3.getImm();
   if (ShAmt) {
     assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
-    O << ", lsl "
-      << markup("<imm:")
-      << "#" << ShAmt
-      << markup(">");
+    O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
   }
   O << "]" << markup(">");
 }
 
 void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
-  O << markup("<imm:")
-    << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+  O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
     << markup(">");
 }
 
 void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned EncodedImm = MI->getOperand(OpNum).getImm();
   unsigned EltBits;
   uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
-  O << markup("<imm:")
-    << "#0x";
+  O << markup("<imm:") << "#0x";
   O.write_hex(Val);
   O << markup(">");
 }
 
 void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:")
-    << "#" << formatImm(Imm + 1)
-    << markup(">");
+  O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
 }
 
 void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   if (Imm == 0)
     return;
-  O << ", ror "
-    << markup("<imm:")
-    << "#";
+  O << ", ror " << markup("<imm:") << "#";
   switch (Imm) {
-  default: assert (0 && "illegal ror immediate!");
-  case 1: O << "8"; break;
-  case 2: O << "16"; break;
-  case 3: O << "24"; break;
+  default:
+    assert(0 && "illegal ror immediate!");
+  case 1:
+    O << "8";
+    break;
+  case 2:
+    O << "16";
+    break;
+  case 3:
+    O << "24";
+    break;
   }
   O << markup(">");
 }
 
 void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   MCOperand Op = MI->getOperand(OpNum);
 
   // Support for fixups (MCFixup)
   if (Op.isExpr())
-    return printOperand(MI, OpNum, O);
+    return printOperand(MI, OpNum, STI, O);
 
   unsigned Bits = Op.getImm() & 0xFF;
   unsigned Rot = (Op.getImm() & 0xF00) >> 7;
 
-  bool  PrintUnsigned = false;
-  switch (MI->getOpcode()){
+  bool PrintUnsigned = false;
+  switch (MI->getOpcode()) {
   case ARM::MOVi:
     // Movs to PC should be treated unsigned
     PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
@@ -1354,36 +1433,30 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
   }
 
   // Explicit #bits, #rot implied
-  O << "#"
-    << markup("<imm:")
-    << Bits
-    << markup(">")
-    << ", #"
-    << markup("<imm:")
-    << Rot
-    << markup(">");
+  O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
+    << Rot << markup(">");
 }
 
 void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << 16 - MI->getOperand(OpNum).getImm()
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
     << markup(">");
 }
 
 void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << 32 - MI->getOperand(OpNum).getImm()
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
     << markup(">");
 }
 
 void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   O << "[" << MI->getOperand(OpNum).getImm() << "]";
 }
 
 void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1391,7 +1464,8 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
-                                          raw_ostream &O) {
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
@@ -1402,8 +1476,8 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
   O << "}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
-                                              unsigned OpNum,
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1416,6 +1490,7 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
 }
 
 void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1430,6 +1505,7 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1447,6 +1523,7 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1455,6 +1532,7 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1468,6 +1546,7 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
                                                   unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
                                                   raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1482,8 +1561,9 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
 }
 
 void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
-                                                  unsigned OpNum,
-                                                  raw_ostream &O) {
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
@@ -1498,9 +1578,9 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1511,24 +1591,24 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
-  O  << "[], ";
+  O << "[], ";
   printRegName(O, MI->getOperand(OpNum).getReg() + 2);
   O << "[], ";
   printRegName(O, MI->getOperand(OpNum).getReg() + 4);
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
-                                                       unsigned OpNum,
-                                                       raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
@@ -1545,6 +1625,7 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1558,9 +1639,9 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
   O << "}";
 }
 
-void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index f179e01..3927c9f 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -24,146 +24,207 @@ class MCOperand;
 class ARMInstPrinter : public MCInstPrinter {
 public:
   ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                 const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+
+  void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printAddrModeTBB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrModeTBH(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   template <bool AlwaysPrintImm0>
-  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
                                   bool AlwaysPrintImm0);
   void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
-  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
   void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   template <bool AlwaysPrintImm0>
-  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
-  void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemBOption(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
 
   template <unsigned scale>
-  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbITMask(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O, unsigned Scale);
   void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O);
   void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O);
   void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O);
   void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-
-  void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printSetendOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIMod(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
-  void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRegisterList(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printModImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printPCLabel(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-  void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits16(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits32(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListOne(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListTwo(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
-  void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListThree(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListFour(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O);
   void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O);
   void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O);
   void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 0b2e3b0..590d72f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -153,18 +153,20 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
 }
 } // end anonymous namespace
 
-static unsigned getRelaxedOpcode(unsigned Op) {
+unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
+  bool HasThumb2 = STI->getFeatureBits() & ARM::FeatureThumb2;
+
   switch (Op) {
   default:
     return Op;
   case ARM::tBcc:
-    return ARM::t2Bcc;
+    return HasThumb2 ? (unsigned)ARM::t2Bcc : Op;
   case ARM::tLDRpci:
-    return ARM::t2LDRpci;
+    return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op;
   case ARM::tADR:
-    return ARM::t2ADR;
+    return HasThumb2 ? (unsigned)ARM::t2ADR : Op;
   case ARM::tB:
-    return ARM::t2B;
+    return HasThumb2 ? (unsigned)ARM::t2B : Op;
   case ARM::tCBZ:
     return ARM::tHINT;
   case ARM::tCBNZ:
@@ -589,7 +591,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
       (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
       (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
     if (A) {
-      const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+      const MCSymbol &Sym = A->getSymbol();
       if (Asm.isThumbFunc(&Sym))
         Value |= 1;
     }
@@ -598,7 +600,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
   // the basic blocks of the same function.  Thus, we would like to resolve
   // the offset when the destination has the same MCFragment.
   if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
-    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = A->getSymbol();
     const MCSymbolData &SymData = Asm.getSymbolData(Sym);
     IsResolved = (SymData.getFragment() == DF);
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index f4f1082..4fa8c79 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -47,6 +47,8 @@ public:
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
 
+  unsigned getRelaxedOpcode(unsigned Op) const;
+
   bool mayNeedRelaxation(const MCInst &Inst) const override;
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 3bd7ab7..ebef789 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -23,7 +23,7 @@ public:
     HasDataInCodeSupport = true;
   }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
                                      Subtype);
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 4efd325..263c4c4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -18,7 +18,7 @@ public:
   ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
       : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMELFObjectWriter(OS, OSABI, isLittle());
   }
 };
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 33be347..f2c4358 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -17,7 +17,7 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend {
 public:
   ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
       : ARMAsmBackend(T, Triple, true) {}
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
   }
 };
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index a821a6b..f4fedee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -32,7 +32,7 @@ namespace {
   public:
     ARMELFObjectWriter(uint8_t OSABI);
 
-    virtual ~ARMELFObjectWriter();
+    ~ARMELFObjectWriter() override;
 
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -81,7 +81,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
   unsigned Type = 0;
   if (IsPCRel) {
     switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("Unimplemented");
+    default:
+      report_fatal_error("unsupported relocation on symbol");
+      return ELF::R_ARM_NONE;
     case FK_Data_4:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
@@ -147,7 +149,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     }
   } else {
     switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("invalid fixup kind!");
+    default:
+      report_fatal_error("unsupported relocation on symbol");
+      return ELF::R_ARM_NONE;
     case FK_Data_1:
       switch (Modifier) {
       default: llvm_unreachable("unsupported Modifier");
@@ -247,7 +251,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
   return Type;
 }
 
-MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS,
                                                uint8_t OSABI,
                                                bool IsLittleEndian) {
   MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 9648ffa..e7c777e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -472,7 +472,7 @@ class ARMELFStreamer : public MCELFStreamer {
 public:
   friend class ARMTargetELFStreamer;
 
-  ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+  ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
                  MCCodeEmitter *Emitter, bool IsThumb)
       : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
         MappingSymbolCounter(0), LastEMS(EMS_None) {
@@ -1083,14 +1083,13 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
   }
 
   // Get .ARM.extab or .ARM.exidx section
-  const MCSectionELF *EHSection = nullptr;
-  if (const MCSymbol *Group = FnSection.getGroup()) {
-    EHSection =
-        getContext().getELFSection(EHSecName, Type, Flags | ELF::SHF_GROUP,
-                                   FnSection.getEntrySize(), Group->getName());
-  } else {
-    EHSection = getContext().getELFSection(EHSecName, Type, Flags);
-  }
+  const MCSymbol *Group = FnSection.getGroup();
+  if (Group)
+    Flags |= ELF::SHF_GROUP;
+  const MCSectionELF *EHSection =
+      getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
+                                 FnSection.getUniqueID(), nullptr, &FnSection);
+
   assert(EHSection && "Failed to get the required EH section");
 
   // Switch to .ARM.extab or .ARM.exidx section
@@ -1383,8 +1382,9 @@ MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
 }
 
 MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    bool RelaxAll, bool IsThumb) {
+                                    raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll,
+                                    bool IsThumb) {
     ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
     // FIXME: This should eventually end up somewhere else where more
     // intelligent flag decisions can be made. For now we are just maintaining
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index e48cabb..6b650f0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -48,7 +48,7 @@ public:
     : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
   }
 
-  ~ARMMCCodeEmitter() {}
+  ~ARMMCCodeEmitter() override {}
 
   bool isThumb(const MCSubtargetInfo &STI) const {
     return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 7ff7f9a..daa8af2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -255,7 +255,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -310,27 +310,26 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
 }
 
 static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
-                                     MCAsmBackend &MAB, raw_ostream &OS,
+                                     MCAsmBackend &MAB, raw_pwrite_stream &OS,
                                      MCCodeEmitter *Emitter, bool RelaxAll) {
   return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
                               T.getArch() == Triple::thumb);
 }
 
 static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
-                                          raw_ostream &OS,
+                                          raw_pwrite_stream &OS,
                                           MCCodeEmitter *Emitter, bool RelaxAll,
                                           bool DWARFMustBeAtTheEnd) {
   return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
 }
 
-static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(MAI, MII, MRI, STI);
+    return new ARMInstPrinter(MAI, MII, MRI);
   return nullptr;
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 7e9ba66..24ca567 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -34,6 +34,7 @@ class StringRef;
 class Target;
 class Triple;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheARMLETarget, TheThumbLETarget;
 extern Target TheARMBETarget, TheThumbBETarget;
@@ -41,9 +42,8 @@ extern Target TheARMBETarget, TheThumbBETarget;
 namespace ARM_MC {
   std::string ParseARMTriple(StringRef TT, StringRef CPU);
 
-  /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
-  /// This is exposed so Asm parser, etc. do not need to go through
-  /// TargetRegistry.
+  /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+  /// do not need to go through TargetRegistry.
   MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
                                             StringRef FS);
 }
@@ -83,24 +83,23 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI
 // Construct a PE/COFF machine code streamer which will generate a PE/COFF
 // object file.
 MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     raw_ostream &OS, MCCodeEmitter *Emitter,
-                                     bool RelaxAll);
+                                     raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll);
 
-/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
-MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
-                                         uint8_t OSABI,
+/// Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
                                          bool IsLittleEndian);
 
-/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
-MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
-                                          bool Is64Bit,
+/// Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer.
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an ARM PE/COFF object writer.
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit);
 
-/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
+/// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 3187d36..b1f9b58 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -88,6 +88,7 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
   case ARM::fixup_arm_ldst_pcrel_12:
   case ARM::fixup_arm_pcrel_10:
   case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_thumb_br:
     return false;
 
     // Handle 24-bit branch kinds.
@@ -101,12 +102,6 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
     Log2Size = llvm::Log2_32(4);
     return true;
 
-    // Handle Thumb branches.
-  case ARM::fixup_arm_thumb_br:
-    RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
-    Log2Size = llvm::Log2_32(2);
-    return true;
-
   case ARM::fixup_t2_uncondbranch:
   case ARM::fixup_arm_thumb_bl:
   case ARM::fixup_arm_thumb_blx:
@@ -477,9 +472,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
-                                                bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
                                                         CPUType,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 2fd6445..166c04b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -26,7 +26,7 @@ public:
     : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
     assert(!Is64Bit && "AArch64 support not yet implemented");
   }
-  virtual ~ARMWinCOFFObjectWriter() { }
+  ~ARMWinCOFFObjectWriter() override {}
 
   unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
                         bool IsCrossSection,
@@ -82,7 +82,8 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
 }
 
 namespace llvm {
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) {
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit) {
   MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
   return createWinCOFFObjectWriter(MOTW, OS);
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index dc707dc..b993b1b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -16,8 +16,8 @@ namespace {
 class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
 public:
   ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
-                     raw_ostream &OS)
-    : MCWinCOFFStreamer(C, AB, CE, OS) { }
+                     raw_pwrite_stream &OS)
+      : MCWinCOFFStreamer(C, AB, CE, OS) {}
 
   void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
   void EmitThumbFunc(MCSymbol *Symbol) override;
@@ -38,7 +38,8 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
 }
 
 MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
-                                           MCAsmBackend &MAB, raw_ostream &OS,
+                                           MCAsmBackend &MAB,
+                                           raw_pwrite_stream &OS,
                                            MCCodeEmitter *Emitter,
                                            bool RelaxAll) {
   return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index b91b0e1..b2599fe 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -132,7 +132,7 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
   }
 
   case ISD::FrameIndex: {
-    int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     EVT VT = Node->getValueType(0);
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
     unsigned Opc = BPF::MOV_rr;
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index d94416b..37f9164 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -63,11 +63,11 @@ public:
     std::string Str;
     raw_string_ostream OS(Str);
 
-    if (DLoc.isUnknown() == false) {
-      DILocation DIL(DLoc.getAsMDNode(Fn.getContext()));
-      StringRef Filename = DIL.getFilename();
-      unsigned Line = DIL.getLineNumber();
-      unsigned Column = DIL.getColumnNumber();
+    if (DLoc) {
+      auto DIL = DLoc.get();
+      StringRef Filename = DIL->getFilename();
+      unsigned Line = DIL->getLine();
+      unsigned Column = DIL->getColumn();
       OS << Filename << ':' << Line << ':' << Column << ' ';
     }
 
@@ -137,7 +137,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
   setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
 
-  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
   setOperationAction(ISD::CTTZ, MVT::i64, Custom);
   setOperationAction(ISD::CTLZ, MVT::i64, Custom);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
@@ -538,12 +537,10 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
 MachineBasicBlock *
 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
-  unsigned Opc = MI->getOpcode();
-
   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
-  assert(Opc == BPF::Select && "Unexpected instr type to insert");
+  assert(MI->getOpcode() == BPF::Select && "Unexpected instr type to insert");
 
   // To "insert" a SELECT instruction, we actually have to insert the diamond
   // control-flow pattern.  The incoming instruction knows the destination vreg
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index 47001f0..26b2cfe 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -231,8 +231,6 @@ class MOV_RI<string OpcodeStr>
   let BPFSrc = 0;   // BPF_K
   let BPFClass = 7; // BPF_ALU64
 }
-def MOV_rr : MOV_RR<"mov">;
-def MOV_ri : MOV_RI<"mov">;
 
 class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
     : InstBPF<(outs GPR:$dst), (ins u64imm:$imm),
@@ -255,7 +253,35 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
   let size = 3;     // BPF_DW
   let BPFClass = 0; // BPF_LD
 }
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def LD_imm64 : LD_IMM64<0, "ld_64">;
+def MOV_rr : MOV_RR<"mov">;
+def MOV_ri : MOV_RI<"mov">;
+}
+
+def LD_pseudo
+    : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm),
+              "ld_pseudo\t$dst, $pseudo, $imm",
+              [(set GPR:$dst, (int_bpf_pseudo imm:$pseudo, imm:$imm))]> {
+
+  bits<3> mode;
+  bits<2> size;
+  bits<4> dst;
+  bits<64> imm;
+  bits<4> pseudo;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{51-48} = dst;
+  let Inst{55-52} = pseudo;
+  let Inst{47-32} = 0;
+  let Inst{31-0} = imm{31-0};
+
+  let mode = 0;     // BPF_IMM
+  let size = 3;     // BPF_DW
+  let BPFClass = 0; // BPF_LD
+}
 
 // STORE instructions
 class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
@@ -461,6 +487,33 @@ def XADD64 : XADD<3, "xadd64", atomic_load_add_64>;
 // undefined def XADD8  : XADD<2, "xadd8", atomic_load_add_8>;
 }
 
+// bswap16, bswap32, bswap64
+class BSWAP<bits<32> SizeOp, string OpcodeStr, list<dag> Pattern>
+    : InstBPF<(outs GPR:$dst), (ins GPR:$src),
+              !strconcat(OpcodeStr, "\t$dst"),
+              Pattern> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<32> imm;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{51-48} = dst;
+  let Inst{31-0} = imm;
+
+  let op = 0xd;     // BPF_END
+  let BPFSrc = 1;   // BPF_TO_BE (TODO: use BPF_TO_LE for big-endian target)
+  let BPFClass = 4; // BPF_ALU
+  let imm = SizeOp;
+}
+
+let Constraints = "$dst = $src" in {
+def BSWAP16 : BSWAP<16, "bswap16", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+def BSWAP32 : BSWAP<32, "bswap32", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+def BSWAP64 : BSWAP<64, "bswap64", [(set GPR:$dst, (bswap GPR:$src))]>;
+}
+
 let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
     hasExtraDefRegAllocReq = 1, hasExtraSrcRegAllocReq = 1, mayLoad = 1 in {
 class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
index 3f09379..05f6d82 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 #include "BPFGenAsmWriter.inc"
 
 void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index d7c2899..adcaff6 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -25,7 +25,8 @@ public:
                  const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                     const char *Modifier = nullptr);
   void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 87c8077..8393135 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -31,7 +31,7 @@ public:
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
 
   // No instruction requires relaxation
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -71,7 +71,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   *(uint16_t *)&Data[Fixup.getOffset() + 2] = (uint16_t)((Value - 8) / 8);
 }
 
-MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_ostream &OS) const {
+MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createBPFELFObjectWriter(OS, 0);
 }
 }
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 169a8a7..a5562c1 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -47,7 +47,7 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
   }
 }
 
-MCObjectWriter *llvm::createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI) {
+MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
 }
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 9c51d66..32d2ef5 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -76,9 +76,8 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
   assert(MO.isExpr());
 
   const MCExpr *Expr = MO.getExpr();
-  MCExpr::ExprKind Kind = Expr->getKind();
 
-  assert(Kind == MCExpr::SymbolRef);
+  assert(Expr->getKind() == MCExpr::SymbolRef);
 
   if (MI.getOpcode() == BPF::JAL)
     // func call name
@@ -125,7 +124,7 @@ void BPFMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted
   unsigned CurByte = 0;
 
-  if (Opcode == BPF::LD_imm64) {
+  if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) {
     uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
     EmitByte(Value >> 56, CurByte, OS);
     EmitByte(((Value >> 48) & 0xff), CurByte, OS);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index fd04001..95f0b02 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -63,15 +63,16 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
 
 static MCStreamer *createBPFMCStreamer(const Triple &T,
                                        MCContext &Ctx, MCAsmBackend &MAB,
-                                       raw_ostream &OS, MCCodeEmitter *Emitter,
+                                       raw_pwrite_stream &OS, MCCodeEmitter *Emitter,
                                        bool RelaxAll) {
   return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
 }
 
-static MCInstPrinter *
-createBPFMCInstPrinter(const Target &T, unsigned SyntaxVariant,
-                       const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                       const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) {
+static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
     return new BPFInstPrinter(MAI, MII, MRI);
   return 0;
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 1fd2bec..ce08b7c 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCSubtargetInfo;
 class Target;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheBPFTarget;
 
@@ -38,7 +39,7 @@ MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
 MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                   StringRef TT, StringRef CPU);
 
-MCObjectWriter *createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
 }
 
 // Defines symbolic names for BPF registers.  This defines a mapping from
diff --git a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
index 209d17c..8bca2e3 100644
--- a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = BPFDesc
 parent = BPF
-required_libraries = MC BPFAsmPrinter BPFInfo
+required_libraries = MC BPFAsmPrinter BPFInfo Support
 add_to_library_groups = BPF
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index d0e2010..f1a7127 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -15,6 +15,7 @@
 #include "CPPTargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -91,6 +92,7 @@ namespace {
   /// CppWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C++ translation unit.
   class CppWriter : public ModulePass {
+    std::unique_ptr<formatted_raw_ostream> OutOwner;
     formatted_raw_ostream &Out;
     const Module *TheModule;
     uint64_t uniqueNum;
@@ -105,8 +107,9 @@ namespace {
 
   public:
     static char ID;
-    explicit CppWriter(formatted_raw_ostream &o) :
-      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+    explicit CppWriter(std::unique_ptr<formatted_raw_ostream> o)
+        : ModulePass(ID), OutOwner(std::move(o)), Out(*OutOwner), uniqueNum(0),
+          is_inline(false), indent_level(0) {}
 
     const char *getPassName() const override { return "C++ backend"; }
 
@@ -1721,7 +1724,7 @@ void CppWriter::printFunctionUses(const Function* F) {
   // initializers.
   if (GenerationType != GenFunction) {
     nl(Out) << "// Global Variable Definitions"; nl(Out);
-    for (const auto &GV : gvs) {
+    for (auto *GV : gvs) {
       if (GlobalVariable *Var = dyn_cast<GlobalVariable>(GV))
         printVariableBody(Var);
     }
@@ -2146,13 +2149,12 @@ char CppWriter::ID = 0;
 //                       External Interface declaration
 //===----------------------------------------------------------------------===//
 
-bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                           formatted_raw_ostream &o,
-                                           CodeGenFileType FileType,
-                                           bool DisableVerify,
-                                           AnalysisID StartAfter,
-                                           AnalysisID StopAfter) {
-  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
-  PM.add(new CppWriter(o));
+bool CPPTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
+  if (FileType != TargetMachine::CGFT_AssemblyFile)
+    return true;
+  auto FOut = llvm::make_unique<formatted_raw_ostream>(o);
+  PM.add(new CppWriter(std::move(FOut)));
   return false;
 }
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 678a932..02d705e 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -29,7 +29,7 @@ struct CPPTargetMachine : public TargetMachine {
       : TargetMachine(T, "", TT, CPU, FS, Options) {}
 
 public:
-  bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
+  bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
                            CodeGenFileType FileType, bool DisableVerify,
                            AnalysisID StartAfter,
                            AnalysisID StopAfter) override;
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index c6ffb96..758ccc7 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_target(HexagonCodeGen
   HexagonAsmPrinter.cpp
   HexagonCFGOptimizer.cpp
   HexagonCopyToCombine.cpp
+  HexagonExpandCondsets.cpp
   HexagonExpandPredSpillCode.cpp
   HexagonFixupHwLoops.cpp
   HexagonFrameLowering.cpp
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 180762f..f0c81e0 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -222,21 +222,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   return;
 }
 
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
-                                                 unsigned SyntaxVariant,
-                                                 const MCAsmInfo &MAI,
-                                                 const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
-  if (SyntaxVariant == 0)
-    return(new HexagonInstPrinter(MAI, MII, MRI));
-  else
-   return nullptr;
-}
-
 extern "C" void LLVMInitializeHexagonAsmPrinter() {
   RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
-
-  TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
-                                        createHexagonMCInstPrinter);
 }
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
new file mode 100644
index 0000000..37ed173
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -0,0 +1,1348 @@
+// Replace mux instructions with the corresponding legal instructions.
+// It is meant to work post-SSA, but still on virtual registers. It was
+// originally placed between register coalescing and machine instruction
+// scheduler.
+// In this place in the optimization sequence, live interval analysis had
+// been performed, and the live intervals should be preserved. A large part
+// of the code deals with preserving the liveness information.
+//
+// Liveness tracking aside, the main functionality of this pass is divided
+// into two steps. The first step is to replace an instruction
+//   vreg0 = C2_mux vreg0, vreg1, vreg2
+// with a pair of conditional transfers
+//   vreg0 = A2_tfrt vreg0, vreg1
+//   vreg0 = A2_tfrf vreg0, vreg2
+// It is the intention that the execution of this pass could be terminated
+// after this step, and the code generated would be functionally correct.
+//
+// If the uses of the source values vreg1 and vreg2 are kills, and their
+// definitions are predicable, then in the second step, the conditional
+// transfers will then be rewritten as predicated instructions. E.g.
+//   vreg0 = A2_or vreg1, vreg2
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// will be rewritten as
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// This replacement has two variants: "up" and "down". Consider this case:
+//   vreg0 = A2_or vreg1, vreg2
+//   ... [intervening instructions] ...
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// variant "up":
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//   ... [intervening instructions, vreg0->vreg3] ...
+//   [deleted]
+// variant "down":
+//   [deleted]
+//   ... [intervening instructions] ...
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// Both, one or none of these variants may be valid, and checks are made
+// to rule out inapplicable variants.
+//
+// As an additional optimization, before either of the two steps above is
+// executed, the pass attempts to coalesce the target register with one of
+// the source registers, e.g. given an instruction
+//   vreg3 = C2_mux vreg0, vreg1, vreg2
+// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
+// the instruction would then be (for example)
+//   vreg3 = C2_mux vreg0, vreg3, vreg2
+// and, under certain circumstances, this could result in only one predicated
+// instruction:
+//   vreg3 = A2_tfrf vreg0, vreg2
+//
+
+#define DEBUG_TYPE "expand-condsets"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
+static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
+
+namespace llvm {
+  void initializeHexagonExpandCondsetsPass(PassRegistry&);
+  FunctionPass *createHexagonExpandCondsets();
+}
+
+namespace {
+  class HexagonExpandCondsets : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonExpandCondsets() :
+        MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
+        LIS(0), CoaLimitActive(false),
+        TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
+      if (OptCoaLimit.getPosition())
+        CoaLimitActive = true, CoaLimit = OptCoaLimit;
+      if (OptTfrLimit.getPosition())
+        TfrLimitActive = true, TfrLimit = OptTfrLimit;
+      initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual const char *getPassName() const {
+      return "Hexagon Expand Condsets";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<LiveIntervals>();
+      AU.addPreserved<SlotIndexes>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    const HexagonInstrInfo *HII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    LiveIntervals *LIS;
+
+    bool CoaLimitActive, TfrLimitActive;
+    unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
+
+    struct RegisterRef {
+      RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
+          Sub(Op.getSubReg()) {}
+      RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+      bool operator== (RegisterRef RR) const {
+        return Reg == RR.Reg && Sub == RR.Sub;
+      }
+      bool operator!= (RegisterRef RR) const { return !operator==(RR); }
+      unsigned Reg, Sub;
+    };
+
+    typedef DenseMap<unsigned,unsigned> ReferenceMap;
+    enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
+    enum { Exec_Then = 0x10, Exec_Else = 0x20 };
+    unsigned getMaskForSub(unsigned Sub);
+    bool isCondset(const MachineInstr *MI);
+
+    void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
+    bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
+
+    LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S);
+    LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S);
+    void makeDefined(unsigned Reg, SlotIndex S, bool SetDef);
+    void makeUndead(unsigned Reg, SlotIndex S);
+    void shrinkToUses(unsigned Reg, LiveInterval &LI);
+    void updateKillFlags(unsigned Reg, LiveInterval &LI);
+    void terminateSegment(LiveInterval::iterator LT, SlotIndex S,
+        LiveInterval &LI);
+    void addInstrToLiveness(MachineInstr *MI);
+    void removeInstrFromLiveness(MachineInstr *MI);
+
+    unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
+    MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR,
+        unsigned DstSR, const MachineOperand &PredOp, bool Cond);
+    bool split(MachineInstr *MI);
+    bool splitInBlock(MachineBasicBlock &B);
+
+    bool isPredicable(MachineInstr *MI);
+    MachineInstr *getReachingDefForPred(RegisterRef RD,
+        MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
+    bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses);
+    bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown);
+    void predicateAt(RegisterRef RD, MachineInstr *MI,
+        MachineBasicBlock::iterator Where, unsigned PredR, bool Cond);
+    void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
+        bool Cond, MachineBasicBlock::iterator First,
+        MachineBasicBlock::iterator Last);
+    bool predicate(MachineInstr *TfrI, bool Cond);
+    bool predicateInBlock(MachineBasicBlock &B);
+
+    void postprocessUndefImplicitUses(MachineBasicBlock &B);
+    void removeImplicitUses(MachineInstr *MI);
+    void removeImplicitUses(MachineBasicBlock &B);
+
+    bool isIntReg(RegisterRef RR, unsigned &BW);
+    bool isIntraBlocks(LiveInterval &LI);
+    bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
+    bool coalesceSegments(MachineFunction &MF);
+  };
+}
+
+char HexagonExpandCondsets::ID = 0;
+
+
+unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
+  switch (Sub) {
+    case Hexagon::subreg_loreg:
+      return Sub_Low;
+    case Hexagon::subreg_hireg:
+      return Sub_High;
+    case Hexagon::NoSubRegister:
+      return Sub_None;
+  }
+  llvm_unreachable("Invalid subregister");
+}
+
+
+bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::C2_mux:
+    case Hexagon::C2_muxii:
+    case Hexagon::C2_muxir:
+    case Hexagon::C2_muxri:
+    case Hexagon::MUX64_rr:
+        return true;
+      break;
+  }
+  return false;
+}
+
+
+void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    Map.insert(std::make_pair(RR.Reg, Mask));
+  else
+    F->second |= Mask;
+}
+
+
+bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    return false;
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  if (Mask & F->second)
+    return true;
+  return false;
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI,
+      SlotIndex S) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->start >= S)
+      return I;
+  }
+  return LI.end();
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI,
+      SlotIndex S) {
+  LiveInterval::iterator P = LI.end();
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->end > S)
+      return P;
+    P = I;
+  }
+  return P;
+}
+
+
+/// Find the implicit use of register Reg in slot index S, and make sure
+/// that the "defined" flag is set to SetDef. While the mux expansion is
+/// going on, predicated instructions will have implicit uses of the
+/// registers that are being defined. This is to keep any preceding
+/// definitions live. If there is no preceding definition, the implicit
+/// use will be marked as "undef", otherwise it will be "defined". This
+/// function is used to update the flag.
+void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S,
+      bool SetDef) {
+  if (!S.isRegister())
+    return;
+  MachineInstr *MI = LIS->getInstructionFromIndex(S);
+  assert(MI && "Expecting instruction");
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+      continue;
+    bool IsDef = !Op.isUndef();
+    if (Op.isImplicit() && IsDef != SetDef)
+      Op.setIsUndef(!SetDef);
+  }
+}
+
+
+void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) {
+  // If S is a block boundary, then there can still be a dead def reaching
+  // this point. Instead of traversing the CFG, queue start points of all
+  // live segments that begin with a register, and end at a block boundary.
+  // This may "resurrect" some truly dead definitions, but doing so is
+  // harmless.
+  SmallVector<MachineInstr*,8> Defs;
+  if (S.isBlock()) {
+    LiveInterval &LI = LIS->getInterval(Reg);
+    for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+      if (!I->start.isRegister() || !I->end.isBlock())
+        continue;
+      MachineInstr *MI = LIS->getInstructionFromIndex(I->start);
+      Defs.push_back(MI);
+    }
+  } else if (S.isRegister()) {
+    MachineInstr *MI = LIS->getInstructionFromIndex(S);
+    Defs.push_back(MI);
+  }
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    MachineInstr *MI = Defs[i];
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+  }
+}
+
+
+/// Shrink the segments in the live interval for a given register to the last
+/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this
+/// function will not mark any definitions of Reg as dead. The reason for this
+/// is that this function is used while a MUX instruction is being expanded,
+/// or while a conditional copy is undergoing predication. During these
+/// processes, there may be defs present in the instruction sequence that have
+/// not yet been removed, or there may be missing uses that have not yet been
+/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible,
+/// but since it does not extend any intervals that are too short, we need to
+/// pre-emptively extend them here in anticipation of further changes.
+void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) {
+  SmallVector<MachineInstr*,4> Deads;
+  LIS->shrinkToUses(&LI, &Deads);
+  // Need to undo the deadification made by "shrinkToUses". It's easier to
+  // do it here, since we have a list of all instructions that were just
+  // marked as dead.
+  for (unsigned i = 0, n = Deads.size(); i < n; ++i) {
+    MachineInstr *MI = Deads[i];
+    // Clear the "dead" flag.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+    // Extend the live segment to the beginning of the next one.
+    LiveInterval::iterator End = LI.end();
+    SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot();
+    LiveInterval::iterator T = LI.FindSegmentContaining(S);
+    assert(T != End);
+    LiveInterval::iterator N = std::next(T);
+    if (N != End)
+      T->end = N->start;
+    else
+      T->end = LIS->getMBBEndIdx(MI->getParent());
+  }
+  updateKillFlags(Reg, LI);
+}
+
+
+/// Given an updated live interval LI for register Reg, update the kill flags
+/// in instructions using Reg to reflect the liveness changes.
+void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) {
+  MRI->clearKillFlags(Reg);
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    SlotIndex EX = I->end;
+    if (!EX.isRegister())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(EX);
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+        continue;
+      // Only set the kill flag on the first encountered use of Reg in this
+      // instruction.
+      Op.setIsKill(true);
+      break;
+    }
+  }
+}
+
+
+/// When adding a new instruction to liveness, the newly added definition
+/// will start a new live segment. This may happen at a position that falls
+/// within an existing live segment. In such case that live segment needs to
+/// be truncated to make room for the new segment. Ultimately, the truncation
+/// will occur at the last use, but for now the segment can be terminated
+/// right at the place where the new segment will start. The segments will be
+/// shrunk-to-uses later.
+void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT,
+      SlotIndex S, LiveInterval &LI) {
+  // Terminate the live segment pointed to by LT within a live interval LI.
+  if (LT == LI.end())
+    return;
+
+  VNInfo *OldVN = LT->valno;
+  SlotIndex EX = LT->end;
+  LT->end = S;
+  // If LT does not end at a block boundary, the termination is done.
+  if (!EX.isBlock())
+    return;
+
+  // If LT ended at a block boundary, it's possible that its value number
+  // is picked up at the beginning other blocks. Create a new value number
+  // and change such blocks to use it instead.
+  VNInfo *NewVN = 0;
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (!I->start.isBlock() || I->valno != OldVN)
+      continue;
+    // Generate on-demand a new value number that is defined by the
+    // block beginning (i.e. -phi).
+    if (!NewVN)
+      NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator());
+    I->valno = NewVN;
+  }
+}
+
+
+/// Add the specified instruction to live intervals. This function is used
+/// to update the live intervals while the program code is being changed.
+/// Neither the expansion of a MUX, nor the predication are atomic, and this
+/// function is used to update the live intervals while these transformations
+/// are being done.
+void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI)
+                                       : LIS->getInstructionIndex(MI);
+  DEBUG(dbgs() << "adding liveness info for instr\n  " << MX << "  " << *MI);
+
+  MX = MX.getRegSlot();
+  bool Predicated = HII->isPredicated(MI);
+  MachineBasicBlock *MB = MI->getParent();
+
+  // Strip all implicit uses from predicated instructions. They will be
+  // added again, according to the updated information.
+  if (Predicated)
+    removeImplicitUses(MI);
+
+  // For each def in MI we need to insert a new live segment starting at MX
+  // into the interval. If there already exists a live segment in the interval
+  // that contains MX, we need to terminate it at MX.
+  SmallVector<RegisterRef,2> Defs;
+  for (auto &Op : MI->operands())
+    if (Op.isReg() && Op.isDef())
+      Defs.push_back(RegisterRef(Op));
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    unsigned DefR = Defs[i].Reg;
+    LiveInterval &LID = LIS->getInterval(DefR);
+    DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    // If MX falls inside of an existing live segment, terminate it.
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    if (LT != LID.end())
+      terminateSegment(LT, MX, LID);
+    DEBUG(dbgs() << "after terminating segment\n  " << LID << "\n");
+
+    // Create a new segment starting from MX.
+    LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX);
+    SlotIndex EX;
+    VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator());
+    if (N == LID.end()) {
+      // There is no live segment after MX. End this segment at the end of
+      // the block.
+      EX = LIS->getMBBEndIdx(MB);
+    } else {
+      // If the next segment starts at the block boundary, end the new segment
+      // at the boundary of the preceding block (i.e. the previous index).
+      // Otherwise, end the segment at the beginning of the next segment. In
+      // either case it will be "shrunk-to-uses" later.
+      EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start;
+    }
+    if (Predicated) {
+      // Predicated instruction will have an implicit use of the defined
+      // register. This is necessary so that this definition will not make
+      // any previous definitions dead. If there are no previous live
+      // segments, still add the implicit use, but make it "undef".
+      // Because of the implicit use, the preceding definition is not
+      // dead. Mark is as such (if necessary).
+      MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true);
+      ImpUse.setSubReg(Defs[i].Sub);
+      bool Undef = false;
+      if (P == LID.end())
+        Undef = true;
+      else {
+        // If the previous segment extends to the end of the previous block,
+        // the end index may actually be the beginning of this block. If
+        // the previous segment ends at a block boundary, move it back by one,
+        // to get the proper block for it.
+        SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+        MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+        if (PB != MB && !LIS->isLiveInToMBB(LID, MB))
+          Undef = true;
+      }
+      if (!Undef) {
+        makeUndead(DefR, P->valno->def);
+        // We are adding a live use, so extend the previous segment to
+        // include it.
+        P->end = MX;
+      } else {
+        ImpUse.setIsUndef(true);
+      }
+
+      if (!MI->readsRegister(DefR))
+        MI->addOperand(ImpUse);
+      if (N != LID.end())
+        makeDefined(DefR, N->start, true);
+    }
+    LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN);
+    LID.addSegment(NR);
+    DEBUG(dbgs() << "added a new segment " << NR << "\n  " << LID << "\n");
+    shrinkToUses(DefR, LID);
+    DEBUG(dbgs() << "updated imp-uses: " << *MI);
+    LID.verify();
+  }
+
+  // For each use in MI:
+  // - If there is no live segment that contains MX for the used register,
+  //   extend the previous one. Ignore implicit uses.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef())
+      continue;
+    unsigned UseR = Op.getReg();
+    LiveInterval &LIU = LIS->getInterval(UseR);
+    // Find the last segment P that starts before MX.
+    LiveInterval::iterator P = LIU.FindSegmentContaining(MX);
+    if (P == LIU.end())
+      P = prevSegment(LIU, MX);
+
+    assert(P != LIU.end() && "MI uses undefined register?");
+    SlotIndex EX = P->end;
+    // If P contains MX, there is not much to do.
+    if (EX > MX) {
+      Op.setIsKill(false);
+      continue;
+    }
+    // Otherwise, extend P to "next(MX)".
+    P->end = MX.getNextIndex();
+    Op.setIsKill(true);
+    // Get the old "kill" instruction, and remove the kill flag.
+    if (MachineInstr *KI = LIS->getInstructionFromIndex(MX))
+      KI->clearRegisterKills(UseR, nullptr);
+    shrinkToUses(UseR, LIU);
+    LIU.verify();
+  }
+}
+
+
+/// Update the live interval information to reflect the removal of the given
+/// instruction from the program. As with "addInstrToLiveness", this function
+/// is called while the program code is being changed.
+void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot();
+  DEBUG(dbgs() << "removing instr\n  " << MX << "  " << *MI);
+
+  // For each def in MI:
+  // If MI starts a live segment, merge this segment with the previous segment.
+  //
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    unsigned DefR = Op.getReg();
+    LiveInterval &LID = LIS->getInterval(DefR);
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    assert(LT != LID.end() && "Expecting live segments");
+    DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    if (LT->start != MX)
+      continue;
+
+    VNInfo *MVN = LT->valno;
+    if (LT != LID.begin()) {
+      // If the current live segment is not the first, the task is easy. If
+      // the previous segment continues into the current block, extend it to
+      // the end of the current one, and merge the value numbers.
+      // Otherwise, remove the current segment, and make the end of it "undef".
+      LiveInterval::iterator P = std::prev(LT);
+      SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+      MachineBasicBlock *MB = MI->getParent();
+      MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+      if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) {
+        makeDefined(DefR, LT->end, false);
+        LID.removeSegment(*LT);
+      } else {
+        // Make the segments adjacent, so that merge-vn can also merge the
+        // segments.
+        P->end = LT->start;
+        makeUndead(DefR, P->valno->def);
+        LID.MergeValueNumberInto(MVN, P->valno);
+      }
+    } else {
+      LiveInterval::iterator N = std::next(LT);
+      LiveInterval::iterator RmB = LT, RmE = N;
+      while (N != LID.end()) {
+        // Iterate until the first register-based definition is found
+        // (i.e. skip all block-boundary entries).
+        LiveInterval::iterator Next = std::next(N);
+        if (N->start.isRegister()) {
+          makeDefined(DefR, N->start, false);
+          break;
+        }
+        if (N->end.isRegister()) {
+          makeDefined(DefR, N->end, false);
+          RmE = Next;
+          break;
+        }
+        RmE = Next;
+        N = Next;
+      }
+      // Erase the segments in one shot to avoid invalidating iterators.
+      LID.segments.erase(RmB, RmE);
+    }
+
+    bool VNUsed = false;
+    for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) {
+      if (I->valno != MVN)
+        continue;
+      VNUsed = true;
+      break;
+    }
+    if (!VNUsed)
+      MVN->markUnused();
+
+    DEBUG(dbgs() << "new interval: ");
+    if (!LID.empty()) {
+      DEBUG(dbgs() << LID << "\n");
+      LID.verify();
+    } else {
+      DEBUG(dbgs() << "<empty>\n");
+      LIS->removeInterval(DefR);
+    }
+  }
+
+  // For uses there is nothing to do. The intervals will be updated via
+  // shrinkToUses.
+  SmallVector<unsigned,4> Uses;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.push_back(R);
+  }
+  LIS->RemoveMachineInstrFromMaps(MI);
+  MI->eraseFromParent();
+  for (unsigned i = 0, n = Uses.size(); i < n; ++i) {
+    LiveInterval &LI = LIS->getInterval(Uses[i]);
+    shrinkToUses(Uses[i], LI);
+  }
+}
+
+
+/// Get the opcode for a conditional transfer of the value in SO (source
+/// operand). The condition (true/false) is given in Cond.
+unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
+      bool Cond) {
+  using namespace Hexagon;
+  if (SO.isReg()) {
+    unsigned PhysR;
+    RegisterRef RS = SO;
+    if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+      const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
+      assert(VC->begin() != VC->end() && "Empty register class");
+      PhysR = *VC->begin();
+    } else {
+      assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+      PhysR = RS.Reg;
+    }
+    unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
+    switch (RC->getSize()) {
+      case 4:
+        return Cond ? A2_tfrt : A2_tfrf;
+      case 8:
+        return Cond ? A2_tfrpt : A2_tfrpf;
+    }
+    llvm_unreachable("Invalid register operand");
+  }
+  if (SO.isImm() || SO.isFPImm())
+    return Cond ? C2_cmoveit : C2_cmoveif;
+  llvm_unreachable("Unexpected source operand");
+}
+
+
+/// Generate a conditional transfer, copying the value SrcOp to the
+/// destination register DstR:DstSR, and using the predicate register from
+/// PredOp. The Cond argument specifies whether the predicate is to be
+/// if(PredOp), or if(!PredOp).
+MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp,
+      unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) {
+  MachineInstr *MI = SrcOp.getParent();
+  MachineBasicBlock &B = *MI->getParent();
+  MachineBasicBlock::iterator At = MI;
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Don't avoid identity copies here (i.e. if the source and the destination
+  // are the same registers). It is actually better to generate them here,
+  // since this would cause the copy to potentially be predicated in the next
+  // step. The predication will remove such a copy if it is unable to
+  /// predicate.
+
+  unsigned Opc = getCondTfrOpcode(SrcOp, Cond);
+  MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc))
+        .addReg(DstR, RegState::Define, DstSR)
+        .addOperand(PredOp)
+        .addOperand(SrcOp);
+  // We don't want any kills yet.
+  TfrI->clearKillInfo();
+  DEBUG(dbgs() << "created an initial copy: " << *TfrI);
+  return TfrI;
+}
+
+
+/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
+/// performs all necessary changes to complete the replacement.
+bool HexagonExpandCondsets::split(MachineInstr *MI) {
+  if (TfrLimitActive) {
+    if (TfrCounter >= TfrLimit)
+      return false;
+    TfrCounter++;
+  }
+  DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber()
+               << ": " << *MI);
+  MachineOperand &MD = MI->getOperand(0); // Definition
+  MachineOperand &MP = MI->getOperand(1); // Predicate register
+  assert(MD.isDef());
+  unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+
+  // First, create the two invididual conditional transfers, and add each
+  // of them to the live intervals information. Do that first and then remove
+  // the old instruction from live intervals.
+  if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true))
+    addInstrToLiveness(TfrT);
+  if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false))
+    addInstrToLiveness(TfrF);
+  removeInstrFromLiveness(MI);
+
+  return true;
+}
+
+
+/// Split all MUX instructions in the given block into pairs of contitional
+/// transfers.
+bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    if (isCondset(I))
+      Changed |= split(I);
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
+  if (HII->isPredicated(MI) || !HII->isPredicable(MI))
+    return false;
+  if (MI->hasUnmodeledSideEffects() || MI->mayStore())
+    return false;
+  // Reject instructions with multiple defs (e.g. post-increment loads).
+  bool HasDef = false;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    if (HasDef)
+      return false;
+    HasDef = true;
+  }
+  for (auto &Mo : MI->memoperands())
+    if (Mo->isVolatile())
+      return false;
+  return true;
+}
+
+
+/// Find the reaching definition for a predicated use of RD. The RD is used
+/// under the conditions given by PredR and Cond, and this function will ignore
+/// definitions that set RD under the opposite conditions.
+MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
+      MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
+  MachineBasicBlock &B = *UseIt->getParent();
+  MachineBasicBlock::iterator I = UseIt, S = B.begin();
+  if (I == S)
+    return 0;
+
+  bool PredValid = true;
+  do {
+    --I;
+    MachineInstr *MI = &*I;
+    // Check if this instruction can be ignored, i.e. if it is predicated
+    // on the complementary condition.
+    if (PredValid && HII->isPredicated(MI)) {
+      if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI)))
+        continue;
+    }
+
+    // Check the defs. If the PredR is defined, invalidate it. If RD is
+    // defined, return the instruction or 0, depending on the circumstances.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+      RegisterRef RR = Op;
+      if (RR.Reg == PredR) {
+        PredValid = false;
+        continue;
+      }
+      if (RR.Reg != RD.Reg)
+        continue;
+      // If the "Reg" part agrees, there is still the subregister to check.
+      // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
+      // not vreg1 (w/o subregisters).
+      if (RR.Sub == RD.Sub)
+        return MI;
+      if (RR.Sub == 0 || RD.Sub == 0)
+        return 0;
+      // We have different subregisters, so we can continue looking.
+    }
+  } while (I != S);
+
+  return 0;
+}
+
+
+/// Check if the instruction MI can be safely moved over a set of instructions
+/// whose side-effects (in terms of register defs and uses) are expressed in
+/// the maps Defs and Uses. These maps reflect the conditional defs and uses
+/// that depend on the same predicate register to allow moving instructions
+/// over instructions predicated on the opposite condition.
+bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs,
+      ReferenceMap &Uses) {
+  // In order to be able to safely move MI over instructions that define
+  // "Defs" and use "Uses", no def operand from MI can be defined or used
+  // and no use operand can be defined.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg())
+      continue;
+    RegisterRef RR = Op;
+    // For physical register we would need to check register aliases, etc.
+    // and we don't want to bother with that. It would be of little value
+    // before the actual register rewriting (from virtual to physical).
+    if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+      return false;
+    // No redefs for any operand.
+    if (isRefInMap(RR, Defs, Exec_Then))
+      return false;
+    // For defs, there cannot be uses.
+    if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
+      return false;
+  }
+  return true;
+}
+
+
+/// Check if the instruction accessing memory (TheI) can be moved to the
+/// location ToI.
+bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI,
+      bool IsDown) {
+  bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore();
+  if (!IsLoad && !IsStore)
+    return true;
+  if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
+    return true;
+  if (TheI->hasUnmodeledSideEffects())
+    return false;
+
+  MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
+  MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
+  bool Ordered = TheI->hasOrderedMemoryRef();
+
+  // Search for aliased memory reference in (StartI, EndI).
+  for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) {
+    MachineInstr *MI = &*I;
+    if (MI->hasUnmodeledSideEffects())
+      return false;
+    bool L = MI->mayLoad(), S = MI->mayStore();
+    if (!L && !S)
+      continue;
+    if (Ordered && MI->hasOrderedMemoryRef())
+      return false;
+
+    bool Conflict = (L && IsStore) || S;
+    if (Conflict)
+      return false;
+  }
+  return true;
+}
+
+
+/// Generate a predicated version of MI (where the condition is given via
+/// PredR and Cond) at the point indicated by Where.
+void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI,
+      MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) {
+  // The problem with updating live intervals is that we can move one def
+  // past another def. In particular, this can happen when moving an A2_tfrt
+  // over an A2_tfrf defining the same register. From the point of view of
+  // live intervals, these two instructions are two separate definitions,
+  // and each one starts another live segment. LiveIntervals's "handleMove"
+  // does not allow such moves, so we need to handle it ourselves. To avoid
+  // invalidating liveness data while we are using it, the move will be
+  // implemented in 4 steps: (1) add a clone of the instruction MI at the
+  // target location, (2) update liveness, (3) delete the old instruction,
+  // and (4) update liveness again.
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = Where->getDebugLoc();  // "Where" points to an instruction.
+  unsigned Opc = MI->getOpcode();
+  unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
+  MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
+  unsigned Ox = 0, NP = MI->getNumOperands();
+  // Skip all defs from MI first.
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isDef())
+      break;
+    Ox++;
+  }
+  // Add the new def, then the predicate register, then the rest of the
+  // operands.
+  MB.addReg(RD.Reg, RegState::Define, RD.Sub);
+  MB.addReg(PredR);
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isImplicit())
+      MB.addOperand(MO);
+    Ox++;
+  }
+
+  MachineFunction &MF = *B.getParent();
+  MachineInstr::mmo_iterator I = MI->memoperands_begin();
+  unsigned NR = std::distance(I, MI->memoperands_end());
+  MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
+  for (unsigned i = 0; i < NR; ++i)
+    MemRefs[i] = *I++;
+  MB.setMemRefs(MemRefs, MemRefs+NR);
+
+  MachineInstr *NewI = MB;
+  NewI->clearKillInfo();
+  addInstrToLiveness(NewI);
+}
+
+
+/// In the range [First, Last], rename all references to the "old" register RO
+/// to the "new" register RN, but only in instructions predicated on the given
+/// condition.
+void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
+      unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
+      MachineBasicBlock::iterator Last) {
+  MachineBasicBlock::iterator End = std::next(Last);
+  for (MachineBasicBlock::iterator I = First; I != End; ++I) {
+    MachineInstr *MI = &*I;
+    // Do not touch instructions that are not predicated, or are predicated
+    // on the opposite condition.
+    if (!HII->isPredicated(MI))
+      continue;
+    if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI)))
+      continue;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || RO != RegisterRef(Op))
+        continue;
+      Op.setReg(RN.Reg);
+      Op.setSubReg(RN.Sub);
+      // In practice, this isn't supposed to see any defs.
+      assert(!Op.isDef() && "Not expecting a def");
+    }
+  }
+}
+
+
+/// For a given conditional copy, predicate the definition of the source of
+/// the copy under the given condition (using the same predicate register as
+/// the copy).
+bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
+  // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
+  unsigned Opc = TfrI->getOpcode();
+  (void)Opc;
+  assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
+  DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+               << ": " << *TfrI);
+
+  MachineOperand &MD = TfrI->getOperand(0);
+  MachineOperand &MP = TfrI->getOperand(1);
+  MachineOperand &MS = TfrI->getOperand(2);
+  // The source operand should be a <kill>. This is not strictly necessary,
+  // but it makes things a lot simpler. Otherwise, we would need to rename
+  // some registers, which would complicate the transformation considerably.
+  if (!MS.isKill())
+    return false;
+
+  RegisterRef RT(MS);
+  unsigned PredR = MP.getReg();
+  MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
+  if (!DefI || !isPredicable(DefI))
+    return false;
+
+  DEBUG(dbgs() << "Source def: " << *DefI);
+
+  // Collect the information about registers defined and used between the
+  // DefI and the TfrI.
+  // Map: reg -> bitmask of subregs
+  ReferenceMap Uses, Defs;
+  MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
+
+  // Check if the predicate register is valid between DefI and TfrI.
+  // If it is, we can then ignore instructions predicated on the negated
+  // conditions when collecting def and use information.
+  bool PredValid = true;
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    if (!I->modifiesRegister(PredR, 0))
+      continue;
+    PredValid = false;
+    break;
+  }
+
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    MachineInstr *MI = &*I;
+    // If this instruction is predicated on the same register, it could
+    // potentially be ignored.
+    // By default assume that the instruction executes on the same condition
+    // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
+    unsigned Exec = Exec_Then | Exec_Else;
+    if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR))
+      Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg())
+        continue;
+      // We don't want to deal with physical registers. The reason is that
+      // they can be aliased with other physical registers. Aliased virtual
+      // registers must share the same register number, and can only differ
+      // in the subregisters, which we are keeping track of. Physical
+      // registers ters no longer have subregisters---their super- and
+      // subregisters are other physical registers, and we are not checking
+      // that.
+      RegisterRef RR = Op;
+      if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+        return false;
+
+      ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+      addRefToMap(RR, Map, Exec);
+    }
+  }
+
+  // The situation:
+  //   RT = DefI
+  //   ...
+  //   RD = TfrI ..., RT
+
+  // If the register-in-the-middle (RT) is used or redefined between
+  // DefI and TfrI, we may not be able proceed with this transformation.
+  // We can ignore a def that will not execute together with TfrI, and a
+  // use that will. If there is such a use (that does execute together with
+  // TfrI), we will not be able to move DefI down. If there is a use that
+  // executed if TfrI's condition is false, then RT must be available
+  // unconditionally (cannot be predicated).
+  // Essentially, we need to be able to rename RT to RD in this segment.
+  if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
+    return false;
+  RegisterRef RD = MD;
+  // If the predicate register is defined between DefI and TfrI, the only
+  // potential thing to do would be to move the DefI down to TfrI, and then
+  // predicate. The reaching def (DefI) must be movable down to the location
+  // of the TfrI.
+  // If the target register of the TfrI (RD) is not used or defined between
+  // DefI and TfrI, consider moving TfrI up to DefI.
+  bool CanUp =   canMoveOver(TfrI, Defs, Uses);
+  bool CanDown = canMoveOver(DefI, Defs, Uses);
+  // The TfrI does not access memory, but DefI could. Check if it's safe
+  // to move DefI down to TfrI.
+  if (DefI->mayLoad() || DefI->mayStore())
+    if (!canMoveMemTo(DefI, TfrI, true))
+      CanDown = false;
+
+  DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+               << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+  MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
+  if (CanUp)
+    predicateAt(RD, DefI, PastDefIt, PredR, Cond);
+  else if (CanDown)
+    predicateAt(RD, DefI, TfrIt, PredR, Cond);
+  else
+    return false;
+
+  if (RT != RD)
+    renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
+
+  // Delete the user of RT first (it should work either way, but this order
+  // of deleting is more natural).
+  removeInstrFromLiveness(TfrI);
+  removeInstrFromLiveness(DefI);
+  return true;
+}
+
+
+/// Predicate all cases of conditional copies in the specified block.
+bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    unsigned Opc = I->getOpcode();
+    if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
+      bool Done = predicate(I, (Opc == Hexagon::A2_tfrt));
+      if (!Done) {
+        // If we didn't predicate I, we may need to remove it in case it is
+        // an "identity" copy, e.g.  vreg1 = A2_tfrt vreg2, vreg1.
+        if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2)))
+          removeInstrFromLiveness(I);
+      }
+      Changed |= Done;
+    }
+  }
+  return Changed;
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) {
+  for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+    MachineOperand &MO = MI->getOperand(i-1);
+    if (MO.isReg() && MO.isUse() && MO.isImplicit())
+      MI->RemoveOperand(i-1);
+  }
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) {
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    if (HII->isPredicated(MI))
+      removeImplicitUses(MI);
+  }
+}
+
+
+void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) {
+  // Implicit uses that are "undef" are only meaningful (outside of the
+  // internals of this pass) when the instruction defines a subregister,
+  // and the implicit-undef use applies to the defined register. In such
+  // cases, the proper way to record the information in the IR is to mark
+  // the definition as "undef", which will be interpreted as "read-undef".
+  typedef SmallSet<unsigned,2> RegisterSet;
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    RegisterSet Undefs;
+    for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+      MachineOperand &MO = MI->getOperand(i-1);
+      if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) {
+        MI->RemoveOperand(i-1);
+        Undefs.insert(MO.getReg());
+      }
+    }
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || !Op.getSubReg())
+        continue;
+      if (Undefs.count(Op.getReg()))
+        Op.setIsUndef(true);
+    }
+  }
+}
+
+
+bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return false;
+  const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BW = 32;
+    return true;
+  }
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    BW = (RR.Sub != 0) ? 32 : 64;
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &LR = *I;
+    // Range must start at a register...
+    if (!LR.start.isRegister())
+      return false;
+    // ...and end in a register or in a dead slot.
+    if (!LR.end.isRegister() && !LR.end.isDead())
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
+  if (CoaLimitActive) {
+    if (CoaCounter >= CoaLimit)
+      return false;
+    CoaCounter++;
+  }
+  unsigned BW1, BW2;
+  if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
+    return false;
+  if (MRI->isLiveIn(R1.Reg))
+    return false;
+  if (MRI->isLiveIn(R2.Reg))
+    return false;
+
+  LiveInterval &L1 = LIS->getInterval(R1.Reg);
+  LiveInterval &L2 = LIS->getInterval(R2.Reg);
+  bool Overlap = L1.overlaps(L2);
+
+  DEBUG(dbgs() << "compatible registers: ("
+               << (Overlap ? "overlap" : "disjoint") << ")\n  "
+               << PrintReg(R1.Reg, TRI, R1.Sub) << "  " << L1 << "\n  "
+               << PrintReg(R2.Reg, TRI, R2.Sub) << "  " << L2 << "\n");
+  if (R1.Sub || R2.Sub)
+    return false;
+  if (Overlap)
+    return false;
+
+  // Coalescing could have a negative impact on scheduling, so try to limit
+  // to some reasonable extent. Only consider coalescing segments, when one
+  // of them does not cross basic block boundaries.
+  if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
+    return false;
+
+  MRI->replaceRegWith(R2.Reg, R1.Reg);
+
+  // Move all live segments from L2 to L1.
+  typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap;
+  ValueInfoMap VM;
+  for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
+    VNInfo *NewVN, *OldVN = I->valno;
+    ValueInfoMap::iterator F = VM.find(OldVN);
+    if (F == VM.end()) {
+      NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+      VM.insert(std::make_pair(OldVN, NewVN));
+    } else {
+      NewVN = F->second;
+    }
+    L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+  }
+  while (L2.begin() != L2.end())
+    L2.removeSegment(*L2.begin());
+
+  updateKillFlags(R1.Reg, L1);
+  DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+  L1.verify();
+
+  return true;
+}
+
+
+/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// the destination register. This could lead to having only one predicated
+/// instruction in the end instead of two.
+bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
+  SmallVector<MachineInstr*,16> Condsets;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock &B = *I;
+    for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
+      MachineInstr *MI = &*J;
+      if (!isCondset(MI))
+        continue;
+      MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+      if (!S1.isReg() && !S2.isReg())
+        continue;
+      Condsets.push_back(MI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
+    MachineInstr *CI = Condsets[i];
+    RegisterRef RD = CI->getOperand(0);
+    RegisterRef RP = CI->getOperand(1);
+    MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
+    bool Done = false;
+    // Consider this case:
+    //   vreg1 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = C2_mux ..., vreg1, vreg2
+    // If vreg0 was coalesced with vreg1, we could end up with the following
+    // code:
+    //   vreg0 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = A2_tfrf ..., vreg2
+    // which will later become:
+    //   vreg0 = instr1 ...
+    //   vreg0 = instr2_cNotPt ...
+    // i.e. there will be an unconditional definition (instr1) of vreg0
+    // followed by a conditional one. The output dependency was there before
+    // and it unavoidable, but if instr1 is predicable, we will no longer be
+    // able to predicate it here.
+    // To avoid this scenario, don't coalesce the destination register with
+    // a source register that is defined by a predicable instruction.
+    if (S1.isReg()) {
+      RegisterRef RS = S1;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S1));
+    }
+    if (!Done && S2.isReg()) {
+      RegisterRef RS = S2;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S2));
+    }
+    Changed |= Done;
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
+  HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
+  TRI = MF.getSubtarget().getRegisterInfo();
+  LIS = &getAnalysis<LiveIntervals>();
+  MRI = &MF.getRegInfo();
+
+  bool Changed = false;
+
+  // Try to coalesce the target of a mux with one of its sources.
+  // This could eliminate a register copy in some circumstances.
+  Changed |= coalesceSegments(MF);
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    // First, simply split all muxes into a pair of conditional transfers
+    // and update the live intervals to reflect the new arrangement.
+    // This is done mainly to make the live interval update simpler, than it
+    // would be while trying to predicate instructions at the same time.
+    Changed |= splitInBlock(*I);
+    // Traverse all blocks and collapse predicable instructions feeding
+    // conditional transfers into predicated instructions.
+    // Walk over all the instructions again, so we may catch pre-existing
+    // cases that were not created in the previous step.
+    Changed |= predicateInBlock(*I);
+  }
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    postprocessUndefImplicitUses(*I);
+  return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Expand Condsets";
+  PassInfo *PI = new PassInfo(Name, "expand-condsets",
+        &HexagonExpandCondsets::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+FunctionPass *llvm::createHexagonExpandCondsets() {
+  return new HexagonExpandCondsets();
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index a2209ab..63900e0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -316,6 +316,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       /*isTailCall=*/false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -1716,6 +1717,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SUBC, MVT::i32, Expand);
   setOperationAction(ISD::SUBC, MVT::i64, Expand);
 
+  // Only add and sub that detect overflow are the saturating ones.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::UADDO, VT, Expand);
+    setOperationAction(ISD::SADDO, VT, Expand);
+    setOperationAction(ISD::USUBO, VT, Expand);
+    setOperationAction(ISD::SSUBO, VT, Expand);
+  }
+
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i64, Expand);
   setOperationAction(ISD::CTTZ, MVT::i32, Expand);
@@ -2106,7 +2115,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       // is Big Endian.
       unsigned OpIdx = NElts - i - 1;
       SDValue Operand = BVN->getOperand(OpIdx);
-      if (dyn_cast<ConstantSDNode>(Operand))
+      if (isa<ConstantSDNode>(Operand))
         // This operand is already in ConstVal.
         continue;
 
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index fbf1ca9..ff4bcad 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -845,8 +845,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
   return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
 }
 
-int HexagonInstrInfo::
-getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   enum Hexagon::PredSense inPredSense;
   inPredSense = invertPredicate ? Hexagon::PredSense_false :
                                   Hexagon::PredSense_true;
@@ -884,7 +883,7 @@ PredicateInstruction(MachineInstr *MI,
   // This will change MI's opcode to its predicate version.
   // However, its operand list is still the old one, i.e. the
   // non-predicate one.
-  MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+  MI->setDesc(get(getCondOpcode(Opc, invertJump)));
 
   int oper = -1;
   unsigned int GAIdx = 0;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2644248..284dde1 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -216,9 +216,7 @@ public:
   short getNonExtOpcode(const MachineInstr *MI) const;
   bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
   bool PredOpcodeHasNot(Opcode_t Opcode) const;
-
-private:
-  int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+  int getCondOpcode(int Opc, bool sense) const;
 
 };
 
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 1717ae3..d61cc54 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -72,7 +72,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
 
 HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
                                    const TargetMachine &TM)
-    : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()),
+    : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
       TSInfo(*TM.getDataLayout()), FrameLowering() {
 
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 48b0bc8..0679866 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -27,11 +27,15 @@
 using namespace llvm;
 
 static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
-      cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
+  cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
 
 static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
-      cl::Hidden, cl::ZeroOrMore, cl::init(false),
-      cl::desc("Disable Hexagon CFG Optimization"));
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon CFG Optimization"));
+
+static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
+  cl::init(true), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Early expansion of MUX"));
 
 
 /// HexagonTargetMachineModule - Note that this is used on hosts that
@@ -55,6 +59,10 @@ static MachineSchedRegistry
 SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
                     createVLIWMachineSched);
 
+namespace llvm {
+  FunctionPass *createHexagonExpandCondsets();
+}
+
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
 ///
 
@@ -79,7 +87,15 @@ namespace {
 class HexagonPassConfig : public TargetPassConfig {
 public:
   HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
-      : TargetPassConfig(TM, PM) {}
+    : TargetPassConfig(TM, PM) {
+    bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
+    if (!NoOpt) {
+      if (EnableExpandCondsets) {
+        Pass *Exp = createHexagonExpandCondsets();
+        insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
+      }
+    }
+  }
 
   HexagonTargetMachine &getHexagonTargetMachine() const {
     return getTM<HexagonTargetMachine>();
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index bdccf88..155aa9e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -57,7 +57,7 @@ public:
   ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
       : HexagonAsmBackend(T), OSABI(OSABI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     StringRef CPU("HexagonV4");
     return createHexagonELFObjectWriter(OS, OSABI, CPU);
   }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 4a3ac8c..fde935b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -27,8 +27,8 @@ private:
 public:
   HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
 
-  virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
-                                bool IsPCRel) const override;
+  unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+                        bool IsPCRel) const override;
 };
 }
 
@@ -55,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
   return Type;
 }
 
-MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
                                                    uint8_t OSABI,
                                                    StringRef CPU) {
   MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
-}
-\ No newline at end of file
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 6c87c9f..ec55234 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -78,7 +78,8 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
 }
 
 void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   const char startPacket = '{',
              endPacket = '}';
   // TODO: add outer HW loop when it's supported too.
@@ -94,7 +95,7 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
 
       Nop.setOpcode (Hexagon::A2_nop);
       HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI));
-      printInst (&Nop, O, NoAnnot);
+      printInst (&Nop, O, NoAnnot, STI);
     }
 
     // Close the packet.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index d02243b..98fb99b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -25,7 +25,8 @@ namespace llvm {
                                 MCRegisterInfo const &MRI)
       : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
 
-    void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot) override;
+    void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+                   const MCSubtargetInfo &STI) override;
     virtual StringRef getOpcodeName(unsigned Opcode) const;
     void printInstruction(const MCInst *MI, raw_ostream &O);
     StringRef getRegName(unsigned RegNo) const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index c63bf32..2e10d81 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -75,13 +75,16 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
   return X;
 }
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
-    return new HexagonInstPrinter(MAI, MII, MRI);
+                                                 const MCRegisterInfo &MRI) {
+  if (SyntaxVariant == 0)
+    return(new HexagonInstPrinter(MAI, MII, MRI));
+  else
+   return nullptr;
 }
 
 // Force static initialization.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 17072d9..de63fd2 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCSubtargetInfo;
 class Target;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheHexagonTarget;
 
@@ -40,8 +41,8 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T,
                                       MCRegisterInfo const &MRI, StringRef TT,
                                       StringRef CPU);
 
-MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
-                                             StringRef CPU);
+MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+                                             uint8_t OSABI, StringRef CPU);
 
 } // End llvm namespace
 
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index acf1214..6c43d97 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 #include "MSP430GenAsmWriter.inc"
 
 void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                  StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 7fae505..70141a9 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -25,7 +25,8 @@ namespace llvm {
                       const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                   const MCSubtargetInfo &STI) override;
 
     // Autogenerated by tblgen.
     void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 4c70803..775c0b2 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -58,12 +58,11 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
                                                 const MCInstrInfo &MII,
-                                                const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI) {
+                                                const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
     return new MSP430InstPrinter(MAI, MII, MRI);
   return nullptr;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 18141a6..08f41a8 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -645,6 +645,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                               Flags.getByValAlign(),
                               /*isVolatile*/false,
                               /*AlwaysInline=*/true,
+                              /*isTailCall=*/false,
                               MachinePointerInfo(),
                               MachinePointerInfo());
       } else {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 68868b6..9266c3b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -102,12 +102,6 @@ namespace llvm {
                                  const std::string &Constraint,
                                  MVT VT) const override;
 
-    unsigned getInlineAsmMemConstraint(
-        const std::string &ConstraintCode) const override {
-      // FIXME: Map different constraints differently.
-      return InlineAsm::Constraint_m;
-    }
-
     /// isTruncateFree - Return true if it's free to truncate a value of type
     /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
     /// register R15W to i8 by referencing its sub-register R15B.
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 05352a2..c63a57c 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -62,7 +62,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
   }
 
   // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.GetOrCreateSymbol(Name);
 }
 
 MCSymbol *MSP430MCInstLower::
@@ -79,7 +79,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
   }
 
   // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.GetOrCreateSymbol(Name);
 }
 
 MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 6401bc1..6f7e3c1 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -220,6 +220,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseDirectiveNaN();
   bool parseDirectiveSet();
   bool parseDirectiveOption();
+  bool parseInsnDirective();
 
   bool parseSetAtDirective();
   bool parseSetNoAtDirective();
@@ -272,7 +273,10 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   unsigned getGPR(int RegNo);
 
-  int getATReg(SMLoc Loc);
+  /// Returns the internal register number for the current AT. Also checks if
+  /// the current AT is unavailable (set to $0) and gives an error if it is.
+  /// This should be used in pseudo-instruction expansions which need AT.
+  unsigned getATReg(SMLoc Loc);
 
   bool processInstruction(MCInst &Inst, SMLoc IDLoc,
                           SmallVectorImpl<MCInst> &Instructions);
@@ -1713,7 +1717,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
   // FIXME: gas has a special case for values that are 000...1111, which
   // becomes a li -1 and then a dsrl
   if (0 <= ImmValue && ImmValue <= 65535) {
-    // For 0 <= j <= 65535.
+    // For unsigned and positive signed 16-bit values (0 <= j <= 65535):
     // li d,j => ori d,$zero,j
     tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
@@ -1721,7 +1725,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else if (ImmValue < 0 && ImmValue >= -32768) {
-    // For -32768 <= j < 0.
+    // For negative signed 16-bit values (-32768 <= j < 0):
     // li d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
@@ -1729,8 +1733,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else if ((ImmValue & 0xffffffff) == ImmValue) {
-    // For any value of j that is representable as a 32-bit integer, create
-    // a sequence of:
+    // For all other values which are representable as a 32-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
@@ -1752,8 +1755,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     // | 16-bytes | 16-bytes | 16-bytes |
     // |__________|__________|__________|
     //
-    // For any value of j that is representable as a 48-bit integer, create
-    // a sequence of:
+    // For any 64-bit value that is representable as a 48-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,hi16(lo32(j))
     //           dsll d,d,16
@@ -1778,7 +1780,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     // | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
     // |__________|__________|__________|__________|
     //
-    // For any value of j that isn't representable as a 48-bit integer.
+    // For all other values which are representable as a 64-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(hi32(j))
     //           dsll d,d,16
@@ -2048,13 +2050,11 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
     TmpRegNum = RegOpNum;
   else {
-    int AT = getATReg(IDLoc);
     // At this point we need AT to perform the expansions and we exit if it is
     // not available.
-    if (!AT)
+    TmpRegNum = getATReg(IDLoc);
+    if (!TmpRegNum)
       return;
-    TmpRegNum = getReg(
-        (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
   }
 
   TempInst.setOpcode(Mips::LUi);
@@ -2078,12 +2078,14 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   // Prepare TempInst for next instruction.
   TempInst.clear();
   // Add temp register to base.
-  TempInst.setOpcode(Mips::ADDu);
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
-  TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
-  Instructions.push_back(TempInst);
-  TempInst.clear();
+  if (BaseRegNum != Mips::ZERO) {
+    TempInst.setOpcode(Mips::ADDu);
+    TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+    TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+    TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+    Instructions.push_back(TempInst);
+    TempInst.clear();
+  }
   // And finally, create original instruction with low part
   // of offset and new base.
   TempInst.setOpcode(Inst.getOpcode());
@@ -2383,11 +2385,15 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
   return CC;
 }
 
-int MipsAsmParser::getATReg(SMLoc Loc) {
-  int AT = AssemblerOptions.back()->getATRegNum();
-  if (AT == 0)
+unsigned MipsAsmParser::getATReg(SMLoc Loc) {
+  unsigned ATIndex = AssemblerOptions.back()->getATRegNum();
+  if (ATIndex == 0) {
     reportParseError(Loc,
                      "pseudo-instruction requires $at, which is not available");
+    return 0;
+  }
+  unsigned AT = getReg(
+      (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, ATIndex);
   return AT;
 }
 
@@ -2571,7 +2577,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
   if (Tok.isNot(AsmToken::Identifier))
     return true;
 
-  std::string Str = Tok.getIdentifier().str();
+  std::string Str = Tok.getIdentifier();
 
   Parser.Lex(); // Eat the identifier.
   // Now make an expression from the rest of the operand.
@@ -3579,11 +3585,7 @@ bool MipsAsmParser::parseSetAssignment() {
   if (Parser.parseExpression(Value))
     return reportParseError("expected valid expression after comma");
 
-  // Check if the Name already exists as a symbol.
-  MCSymbol *Sym = getContext().LookupSymbol(Name);
-  if (Sym)
-    return reportParseError("symbol already defined");
-  Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
   Sym->setVariableValue(Value);
 
   return false;
@@ -4044,6 +4046,23 @@ bool MipsAsmParser::parseDirectiveOption() {
   return false;
 }
 
+/// parseInsnDirective
+///  ::= .insn
+bool MipsAsmParser::parseInsnDirective() {
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  // The actual label marking happens in
+  // MipsELFStreamer::createPendingLabelRelocs().
+  getTargetStreamer().emitDirectiveInsn();
+
+  getParser().Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
 /// parseDirectiveModule
 ///  ::= .module oddspreg
 ///  ::= .module nooddspreg
@@ -4437,6 +4456,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".llvm_internal_mips_reallow_module_directive")
     return parseInternalDirectiveReallowModule();
 
+  if (IDVal == ".insn")
+    return parseInsnDirective();
+
   return true;
 }
 
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index aad549d..e80a47b 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -77,7 +77,7 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                StringRef Annot) {
+                                StringRef Annot, const MCSubtargetInfo &STI) {
   switch (MI->getOpcode()) {
   default:
     break;
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 468dc07..713f35c 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -86,7 +86,8 @@ public:
   static const char *getRegisterName(unsigned RegNo);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index acf6f21..dbcd867 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -155,7 +155,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   return Value;
 }
 
-MCObjectWriter *MipsAsmBackend::createObjectWriter(raw_ostream &OS) const {
+MCObjectWriter *
+MipsAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createMipsELFObjectWriter(OS,
     MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 243b73d..b3d5a49 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -36,7 +36,7 @@ public:
                  bool Is64Bit)
       : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
 
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index a68bf16..8d9e3e3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -10,6 +10,7 @@
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
@@ -22,17 +23,33 @@
 using namespace llvm;
 
 namespace {
+// A helper structure based on ELFRelocationEntry, used for sorting entries in
+// the relocation table.
+struct MipsRelocationEntry {
+  MipsRelocationEntry(const ELFRelocationEntry &R)
+      : R(R), SortOffset(R.Offset), HasMatchingHi(false) {}
+  const ELFRelocationEntry R;
+  // SortOffset equals R.Offset except for the *HI16 relocations, for which it
+  // will be set based on the R.Offset of the matching *LO16 relocation.
+  int64_t SortOffset;
+  // True when this is a *LO16 relocation chosen as a match for a *HI16
+  // relocation.
+  bool HasMatchingHi;
+};
+
   class MipsELFObjectWriter : public MCELFObjectTargetWriter {
   public:
     MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
                         bool _isN64, bool IsLittleEndian);
 
-    virtual ~MipsELFObjectWriter();
+    ~MipsELFObjectWriter() override;
 
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
     bool needsRelocateWithSymbol(const MCSymbolData &SD,
                                  unsigned Type) const override;
+    virtual void sortRelocs(const MCAssembler &Asm,
+                            std::vector<ELFRelocationEntry> &Relocs) override;
   };
 }
 
@@ -225,6 +242,169 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   return Type;
 }
 
+// Sort entries by SortOffset in descending order.
+// When there are more *HI16 relocs paired with one *LO16 reloc, the 2nd rule
+// sorts them in ascending order of R.Offset.
+static int cmpRelMips(const MipsRelocationEntry *AP,
+                      const MipsRelocationEntry *BP) {
+  const MipsRelocationEntry &A = *AP;
+  const MipsRelocationEntry &B = *BP;
+  if (A.SortOffset != B.SortOffset)
+    return B.SortOffset - A.SortOffset;
+  if (A.R.Offset != B.R.Offset)
+    return A.R.Offset - B.R.Offset;
+  if (B.R.Type != A.R.Type)
+    return B.R.Type - A.R.Type;
+  //llvm_unreachable("ELFRelocs might be unstable!");
+  return 0;
+}
+
+// For the given Reloc.Type, return the matching relocation type, as in the
+// table below.
+static unsigned getMatchingLoType(const MCAssembler &Asm,
+                                  const ELFRelocationEntry &Reloc) {
+  unsigned Type = Reloc.Type;
+  if (Type == ELF::R_MIPS_HI16)
+    return ELF::R_MIPS_LO16;
+  if (Type == ELF::R_MICROMIPS_HI16)
+    return ELF::R_MICROMIPS_LO16;
+  if (Type == ELF::R_MIPS16_HI16)
+    return ELF::R_MIPS16_LO16;
+
+  const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol);
+
+  if (MCELF::GetBinding(SD) != ELF::STB_LOCAL)
+    return ELF::R_MIPS_NONE;
+
+  if (Type == ELF::R_MIPS_GOT16)
+    return ELF::R_MIPS_LO16;
+  if (Type == ELF::R_MICROMIPS_GOT16)
+    return ELF::R_MICROMIPS_LO16;
+  if (Type == ELF::R_MIPS16_GOT16)
+    return ELF::R_MIPS16_LO16;
+
+  return ELF::R_MIPS_NONE;
+}
+
+// Return true if First needs a matching *LO16, its matching *LO16 type equals
+// Second's type and both relocations are against the same symbol.
+static bool areMatchingHiAndLo(const MCAssembler &Asm,
+                               const ELFRelocationEntry &First,
+                               const ELFRelocationEntry &Second) {
+  return getMatchingLoType(Asm, First) != ELF::R_MIPS_NONE &&
+         getMatchingLoType(Asm, First) == Second.Type &&
+         First.Symbol && First.Symbol == Second.Symbol;
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 preceded by a matching *HI16.
+static bool
+isPrecededByMatchingHi(const MCAssembler &Asm, uint32_t Index,
+                       std::vector<MipsRelocationEntry> &MipsRelocs) {
+  return Index < MipsRelocs.size() - 1 &&
+         areMatchingHiAndLo(Asm, MipsRelocs[Index + 1].R, MipsRelocs[Index].R);
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 not preceded by a matching *HI16
+// and not chosen by a *HI16 as a match.
+static bool isFreeLo(const MCAssembler &Asm, uint32_t Index,
+                     std::vector<MipsRelocationEntry> &MipsRelocs) {
+  return Index < MipsRelocs.size() && !MipsRelocs[Index].HasMatchingHi &&
+         !isPrecededByMatchingHi(Asm, Index, MipsRelocs);
+}
+
+// Lo is chosen as a match for Hi, set their fields accordingly.
+// Mips instructions have fixed length of at least two bytes (two for
+// micromips/mips16, four for mips32/64), so we can set HI's SortOffset to
+// matching LO's Offset minus one to simplify the sorting function.
+static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) {
+  Lo.HasMatchingHi = true;
+  Hi.SortOffset = Lo.R.Offset - 1;
+}
+
+// We sort relocation table entries by offset, except for one additional rule
+// required by MIPS ABI: every *HI16 relocation must be immediately followed by
+// the corresponding *LO16 relocation. We also support a GNU extension that
+// allows more *HI16s paired with one *LO16.
+//
+// *HI16 relocations and their matching *LO16 are:
+//
+// +---------------------------------------------+-------------------+
+// |               *HI16                         |  matching *LO16   |
+// |---------------------------------------------+-------------------|
+// |  R_MIPS_HI16, local R_MIPS_GOT16            |    R_MIPS_LO16    |
+// |  R_MICROMIPS_HI16, local R_MICROMIPS_GOT16  | R_MICROMIPS_LO16  |
+// |  R_MIPS16_HI16, local R_MIPS16_GOT16        |  R_MIPS16_LO16    |
+// +---------------------------------------------+-------------------+
+//
+// (local R_*_GOT16 meaning R_*_GOT16 against the local symbol.)
+//
+// To handle *HI16 and *LO16 relocations, the linker needs a combined addend
+// ("AHL") calculated from both *HI16 ("AHI") and *LO16 ("ALO") relocations:
+// AHL = (AHI << 16) + (short)ALO;
+//
+// We are reusing gnu as sorting algorithm so we are emitting the relocation
+// table sorted the same way as gnu as would sort it, for easier comparison of
+// the generated .o files.
+//
+// The logic is:
+// search the table (starting from the highest offset and going back to zero)
+// for all *HI16 relocations that don't have a matching *LO16.
+// For every such HI, find a matching LO with highest offset that isn't already
+// matched with another HI. If there are no free LOs, match it with the first
+// found (starting from lowest offset).
+// When there are more HIs matched with one LO, sort them in descending order by
+// offset.
+//
+// In other words, when searching for a matching LO:
+// - don't look for a 'better' match for the HIs that are already followed by a
+//   matching LO;
+// - prefer LOs without a pair;
+// - prefer LOs with higher offset;
+void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+                                     std::vector<ELFRelocationEntry> &Relocs) {
+  if (Relocs.size() < 2)
+    return;
+
+  // The default function sorts entries by Offset in descending order.
+  MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+
+  // Init MipsRelocs from Relocs.
+  std::vector<MipsRelocationEntry> MipsRelocs;
+  for (unsigned I = 0, E = Relocs.size(); I != E; ++I)
+    MipsRelocs.push_back(MipsRelocationEntry(Relocs[I]));
+
+  // Find a matching LO for all HIs that need it.
+  for (int32_t I = 0, E = MipsRelocs.size(); I != E; ++I) {
+    if (getMatchingLoType(Asm, MipsRelocs[I].R) == ELF::R_MIPS_NONE ||
+        (I > 0 && isPrecededByMatchingHi(Asm, I - 1, MipsRelocs)))
+      continue;
+
+    int32_t MatchedLoIndex = -1;
+
+    // Search the list in the ascending order of Offset.
+    for (int32_t J = MipsRelocs.size() - 1, N = -1; J != N; --J) {
+      // check for a match
+      if (areMatchingHiAndLo(Asm, MipsRelocs[I].R, MipsRelocs[J].R) &&
+          (MatchedLoIndex == -1 || // first match
+           // or we already have a match,
+           // but this one is with higher offset and it's free
+           (MatchedLoIndex > J && isFreeLo(Asm, J, MipsRelocs))))
+        MatchedLoIndex = J;
+    }
+
+    if (MatchedLoIndex != -1)
+      // We have a match.
+      setMatch(MipsRelocs[I], MipsRelocs[MatchedLoIndex]);
+  }
+
+  // SortOffsets are calculated, call the sorting function.
+  array_pod_sort(MipsRelocs.begin(), MipsRelocs.end(), cmpRelMips);
+
+  // Copy sorted MipsRelocs back to Relocs.
+  for (unsigned I = 0, E = MipsRelocs.size(); I != E; ++I)
+    Relocs[I] = MipsRelocs[I].R;
+}
+
 bool
 MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
                                              unsigned Type) const {
@@ -264,7 +444,8 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
   }
 }
 
-MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS,
+                                                uint8_t OSABI,
                                                 bool IsLittleEndian,
                                                 bool Is64Bit) {
   MCELFObjectTargetWriter *MOTW =
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 93f60df..6d1d9f4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -21,8 +21,6 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
 
   MCContext &Context = getContext();
   const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
-  MipsTargetELFStreamer *ELFTargetStreamer =
-      static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
 
   for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
     const MCOperand &Op = Inst.getOperand(OpIndex);
@@ -34,6 +32,14 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
     RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
   }
 
+  createPendingLabelRelocs();
+}
+
+void MipsELFStreamer::createPendingLabelRelocs() {
+  MipsTargetELFStreamer *ELFTargetStreamer =
+      static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
+
+  // FIXME: Also mark labels when in MIPS16 mode.
   if (ELFTargetStreamer->isMicroMipsEnabled()) {
     for (auto Label : Labels) {
       MCSymbolData &Data = getOrCreateSymbolData(Label);
@@ -70,7 +76,8 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
 }
 
 MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context,
-                                           MCAsmBackend &MAB, raw_ostream &OS,
+                                           MCAsmBackend &MAB,
+                                           raw_pwrite_stream &OS,
                                            MCCodeEmitter *Emitter,
                                            bool RelaxAll) {
   return new MipsELFStreamer(Context, MAB, OS, Emitter);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 6b834c6..4e30901 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -33,7 +33,7 @@ class MipsELFStreamer : public MCELFStreamer {
 
 
 public:
-  MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
+  MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
                   MCCodeEmitter *Emitter)
       : MCELFStreamer(Context, MAB, OS, Emitter) {
 
@@ -65,10 +65,13 @@ public:
 
   /// Emits all the option records stored up until the point it's called.
   void EmitMipsOptionRecords();
+
+  /// Mark labels as microMIPS, if necessary for the subtarget.
+  void createPendingLabelRelocs();
 };
 
 MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     raw_ostream &OS, MCCodeEmitter *Emitter,
-                                     bool RelaxAll);
+                                     raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll);
 } // namespace llvm.
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index b01726d..cc40e2e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -43,7 +43,7 @@ public:
   MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
       : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
 
-  ~MipsMCCodeEmitter() {}
+  ~MipsMCCodeEmitter() override {}
 
   void EmitByte(unsigned char C, raw_ostream &OS) const;
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index e6b5be7..687b800 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -23,9 +23,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
 
 // This function creates an MCELFStreamer for Mips NaCl.
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                         raw_ostream &OS,
-                                         MCCodeEmitter *Emitter,
-                                         bool RelaxAll);
+                                         raw_pwrite_stream &OS,
+                                         MCCodeEmitter *Emitter, bool RelaxAll);
 }
 
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 6f3f37b..a75d27d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -97,17 +97,16 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+static MCInstPrinter *createMipsMCInstPrinter(const Triple &T,
                                               unsigned SyntaxVariant,
                                               const MCAsmInfo &MAI,
                                               const MCInstrInfo &MII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI) {
+                                              const MCRegisterInfo &MRI) {
   return new MipsInstPrinter(MAI, MII, MRI);
 }
 
 static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
-                                    MCAsmBackend &MAB, raw_ostream &OS,
+                                    MCAsmBackend &MAB, raw_pwrite_stream &OS,
                                     MCCodeEmitter *Emitter, bool RelaxAll) {
   MCStreamer *S;
   if (!T.isOSNaCl())
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 92f394a..577a8b3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCSubtargetInfo;
 class StringRef;
 class Target;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheMipsTarget;
 extern Target TheMipselTarget;
@@ -53,7 +54,7 @@ MCAsmBackend *createMipsAsmBackendEL64(const Target &T,
                                        const MCRegisterInfo &MRI, StringRef TT,
                                        StringRef CPU);
 
-MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
                                           bool IsLittleEndian, bool Is64Bit);
 
 namespace MIPS_MC {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 1adfdf9..35348aa 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -36,11 +36,11 @@ const unsigned LoadStoreStackMaskReg = Mips::T7;
 
 class MipsNaClELFStreamer : public MipsELFStreamer {
 public:
-  MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                      MCCodeEmitter *Emitter)
+  MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                      raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
       : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
 
-  ~MipsNaClELFStreamer() {}
+  ~MipsNaClELFStreamer() override {}
 
 private:
   // Whether we started the sandboxing sequence for calls.  Calls are bundled
@@ -252,7 +252,7 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) {
 }
 
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                         raw_ostream &OS,
+                                         raw_pwrite_stream &OS,
                                          MCCodeEmitter *Emitter,
                                          bool RelaxAll) {
   MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 5790a5c..cfd56c6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -54,6 +54,7 @@ void MipsTargetStreamer::emitDirectiveNaN2008() {}
 void MipsTargetStreamer::emitDirectiveNaNLegacy() {}
 void MipsTargetStreamer::emitDirectiveOptionPic0() {}
 void MipsTargetStreamer::emitDirectiveOptionPic2() {}
+void MipsTargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                    unsigned ReturnReg) {}
 void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
@@ -189,6 +190,11 @@ void MipsTargetAsmStreamer::emitDirectiveOptionPic2() {
   OS << "\t.option\tpic2\n";
 }
 
+void MipsTargetAsmStreamer::emitDirectiveInsn() {
+  MipsTargetStreamer::emitDirectiveInsn();
+  OS << "\t.insn\n";
+}
+
 void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                       unsigned ReturnReg) {
   OS << "\t.frame\t$"
@@ -507,9 +513,8 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
   const MCSymbol &RhsSym =
       static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
   MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
-  uint8_t Type = MCELF::GetType(Data);
-  if ((Type != ELF::STT_FUNC) ||
-      !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
+
+  if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
     return;
 
   MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
@@ -637,6 +642,12 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() {
   MCA.setELFHeaderEFlags(Flags);
 }
 
+void MipsTargetELFStreamer::emitDirectiveInsn() {
+  MipsTargetStreamer::emitDirectiveInsn();
+  MipsELFStreamer &MEF = static_cast<MipsELFStreamer &>(Streamer);
+  MEF.createPendingLabelRelocs();
+}
+
 void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                       unsigned ReturnReg_) {
   MCContext &Context = getStreamer().getAssembler().getContext();
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index abecfa0..5828fbd 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -143,25 +143,6 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void Mips16FrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
-
-    const Mips16InstrInfo &TII =
-        *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
-
-    TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
-  }
-
-  MBB.erase(I);
-}
-
 bool
 Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 012d558..0287e59 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -26,10 +26,6 @@ public:
   void emitPrologue(MachineFunction &MF) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                  MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I) const override;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 00d4495..a49572e 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
 void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const {
+  if (Amount == 0)
+    return;
+
   if (isInt<16>(Amount))  // need to change to addiu sp, ....and isInt<16>
     BuildAddiuSpImm(MBB, I, Amount);
   else
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index f9b7387..6540b40 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -77,7 +77,7 @@ public:
 
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator I) const;
+                      MachineBasicBlock::iterator I) const override;
 
   /// Emit a series of instructions to load an immediate.
   // This is to adjust some FrameReg. We return the new register to be used
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1eb3b2c..9024f21 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -260,31 +260,22 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
   unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize();
   bool HasAFGR64Reg = false;
   unsigned CSFPRegsSize = 0;
-  unsigned i, e = CSI.size();
-
-  // Set FPU Bitmask.
-  for (i = 0; i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (Mips::GPR32RegClass.contains(Reg))
-      break;
 
+  for (const auto &I : CSI) {
+    unsigned Reg = I.getReg();
     unsigned RegNum = TRI->getEncodingValue(Reg);
-    if (Mips::AFGR64RegClass.contains(Reg)) {
+
+    // If it's a floating point register, set the FPU Bitmask.
+    // If it's a general purpose register, set the CPU Bitmask.
+    if (Mips::FGR32RegClass.contains(Reg)) {
+      FPUBitmask |= (1 << RegNum);
+      CSFPRegsSize += FGR32RegSize;
+    } else if (Mips::AFGR64RegClass.contains(Reg)) {
       FPUBitmask |= (3 << RegNum);
       CSFPRegsSize += AFGR64RegSize;
       HasAFGR64Reg = true;
-      continue;
-    }
-
-    FPUBitmask |= (1 << RegNum);
-    CSFPRegsSize += FGR32RegSize;
-  }
-
-  // Set CPU Bitmask.
-  for (; i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    unsigned RegNum = TRI->getEncodingValue(Reg);
-    CPUBitmask |= (1 << RegNum);
+    } else if (Mips::GPR32RegClass.contains(Reg))
+      CPUBitmask |= (1 << RegNum);
   }
 
   // FP Regs are saved right below where the virtual frame pointer points to.
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 7de0081..e8e3d3d 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -440,7 +440,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
 
 bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
   const GlobalValue *GV = dyn_cast<GlobalValue>(V);
-  if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic())
+  if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic())
     return false;
   if (!GV)
     return false;
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 8b8b019..826fbaf 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -131,3 +131,20 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
 
   return RoundUpToAlignment(Offset, getStackAlignment());
 }
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  unsigned SP = STI.getABI().IsN64() ? Mips::SP_64 : Mips::SP;
+
+  if (!hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+      Amount = -Amount;
+
+    STI.getInstrInfo()->adjustStackPtr(SP, Amount, MBB, I);
+  }
+
+  MBB.erase(I);
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 90a8d2a..96d1e29 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -32,6 +32,11 @@ public:
 
   bool hasFP(const MachineFunction &MF) const override;
 
+  void
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I) const override;
+
 protected:
   uint64_t estimateStackSize(const MachineFunction &MF) const;
 };
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index e4bae03..f37737d 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -3682,6 +3682,7 @@ void MipsTargetLowering::passByValArg(
                             DAG.getIntPtrConstant(VA.getLocMemOffset()));
   Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
                         Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
+                        /*isTailCall=*/false,
                         MachinePointerInfo(), MachinePointerInfo());
   MemOpChains.push_back(Chain);
 }
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 7b2b289..4589535 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -117,6 +117,10 @@ public:
                                 const TargetRegisterInfo *TRI,
                                 int64_t Offset) const = 0;
 
+  virtual void adjustStackPtr(unsigned SP, int64_t Amount,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const = 0;
+
   /// Create an instruction which has the same operands and memory operands
   /// as MI but has a new opcode.
   MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index c937d2b..a1fad66 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -186,10 +186,8 @@ def InMips16Mode :    Predicate<"Subtarget->inMips16Mode()">,
                       AssemblerPredicate<"FeatureMips16">;
 def HasCnMips    :    Predicate<"Subtarget->hasCnMips()">,
                       AssemblerPredicate<"FeatureCnMips">;
-def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">,
-                      AssemblerPredicate<"FeatureMips32">;
-def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
-                      AssemblerPredicate<"FeatureMips32">;
+def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
 def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">;
 def HasStdEnc :       Predicate<"Subtarget->hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
@@ -1596,8 +1594,12 @@ def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
 }
 def : MipsInstAlias<"bnez $rs,$offset",
                     (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"bnezl $rs,$offset",
+                    (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
 def : MipsInstAlias<"beqz $rs,$offset",
                     (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"beqzl $rs,$offset",
+                    (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
 def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
     
 def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 09e722d..0d1ee04 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -60,15 +60,7 @@ void MipsCallEntry::printCustom(raw_ostream &O) const {
 #endif
 }
 
-MipsFunctionInfo::~MipsFunctionInfo() {
-  for (StringMap<const MipsCallEntry *>::iterator
-       I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E;
-       ++I)
-    delete I->getValue();
-
-  for (const auto &Entry : GlobalCallEntries)
-    delete Entry.second;
-}
+MipsFunctionInfo::~MipsFunctionInfo() {}
 
 bool MipsFunctionInfo::globalBaseRegSet() const {
   return GlobalBaseReg;
@@ -125,21 +117,21 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
 }
 
 MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
-  const MipsCallEntry *&E = ExternalCallEntries[Name];
+  std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name];
 
   if (!E)
-    E = new MipsCallEntry(Name);
+    E = llvm::make_unique<MipsCallEntry>(Name);
 
-  return MachinePointerInfo(E);
+  return MachinePointerInfo(E.get());
 }
 
 MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
-  const MipsCallEntry *&E = GlobalCallEntries[Val];
+  std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val];
 
   if (!E)
-    E = new MipsCallEntry(Val);
+    E = llvm::make_unique<MipsCallEntry>(Val);
 
-  return MachinePointerInfo(E);
+  return MachinePointerInfo(E.get());
 }
 
 int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 217f307..32436ef 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -144,8 +144,9 @@ private:
   int MoveF64ViaSpillFI;
 
   /// MipsCallEntry maps.
-  StringMap<const MipsCallEntry *> ExternalCallEntries;
-  ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries;
+  StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries;
+  ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>>
+      GlobalCallEntries;
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index dc29cbd..746feab 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -52,7 +52,7 @@ public:
     COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
     COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
   }
-  ~MipsRegInfoRecord() {}
+  ~MipsRegInfoRecord() override {}
 
   void EmitMipsOptionRecord() override;
   void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 7c79c4c..23feb5c 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -607,26 +607,6 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     !MFI->hasVarSizedObjects();
 }
 
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void MipsSEFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo());
-
-  if (!hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
-
-    unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
-    TII.adjustStackPtr(SP, Amount, MBB, I);
-  }
-
-  MBB.erase(I);
-}
-
 void MipsSEFrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 0eca1df..22448a4 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -27,10 +27,6 @@ public:
   void emitPrologue(MachineFunction &MF) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                  MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I) const override;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index a598c3f..6daa632 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -948,7 +948,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
     llvm_unreachable("Unexpected asm memory constraint");
   // All memory constraints can at least accept raw pointers.
   case InlineAsm::Constraint_i:
-  case InlineAsm::Constraint_R:
     OutOps.push_back(Op);
     OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
     return false;
@@ -961,6 +960,20 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
     OutOps.push_back(Op);
     OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
     return false;
+  case InlineAsm::Constraint_R:
+    // The 'R' constraint is supposed to be much more complicated than this.
+    // However, it's becoming less useful due to architectural changes and
+    // ought to be replaced by other constraints such as 'ZC'.
+    // For now, support 9-bit signed offsets which is supportable by all
+    // subtargets for all instructions.
+    if (selectAddrRegImm9(Op, Base, Offset)) {
+      OutOps.push_back(Base);
+      OutOps.push_back(Offset);
+      return false;
+    }
+    OutOps.push_back(Op);
+    OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+    return false;
   case InlineAsm::Constraint_ZC:
     // ZC matches whatever the pref, ll, and sc instructions can handle for the
     // given subtarget.
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index b992579..cb38393 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
   unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
   unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
 
+  if (Amount == 0)
+    return;
+
   if (isInt<16>(Amount))// addi sp, sp, amount
     BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
   else { // Expand immediate that doesn't fit in 16-bit.
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index d16fab2..bebbabf 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -68,7 +68,7 @@ public:
 
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator I) const;
+                      MachineBasicBlock::iterator I) const override;
 
   /// Emit a series of instructions to load an immediate. If NewImm is a
   /// non-NULL parameter, the last instruction is not emitted, but instead
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 1ff041d..22b0c6c 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -45,6 +45,7 @@ public:
   virtual void emitDirectiveNaNLegacy();
   virtual void emitDirectiveOptionPic0();
   virtual void emitDirectiveOptionPic2();
+  virtual void emitDirectiveInsn();
   virtual void emitFrame(unsigned StackReg, unsigned StackSize,
                          unsigned ReturnReg);
   virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
@@ -160,6 +161,7 @@ public:
   void emitDirectiveNaNLegacy() override;
   void emitDirectiveOptionPic0() override;
   void emitDirectiveOptionPic2() override;
+  void emitDirectiveInsn() override;
   void emitFrame(unsigned StackReg, unsigned StackSize,
                  unsigned ReturnReg) override;
   void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
@@ -227,6 +229,7 @@ public:
   void emitDirectiveNaNLegacy() override;
   void emitDirectiveOptionPic0() override;
   void emitDirectiveOptionPic2() override;
+  void emitDirectiveInsn() override;
   void emitFrame(unsigned StackReg, unsigned StackSize,
                  unsigned ReturnReg) override;
   void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 80b2f62..ac92df9 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -28,13 +28,9 @@ using namespace llvm;
 
 #include "NVPTXGenAsmWriter.inc"
 
-
 NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                                   const MCRegisterInfo &MRI,
-                                   const MCSubtargetInfo &STI)
-  : MCInstPrinter(MAI, MII, MRI) {
-  setAvailableFeatures(STI.getFeatureBits());
-}
+                                   const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   // Decode the virtual register
@@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                 StringRef Annot) {
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, OS);
 
   // Next always print the annotation.
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 0496964..02c5a21 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -25,10 +25,11 @@ class MCSubtargetInfo;
 class NVPTXInstPrinter : public MCInstPrinter {
 public:
   NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                   const MCRegisterInfo &MRI);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 2b4d864..f9e4324 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -58,14 +58,13 @@ static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
   return X;
 }
 
-static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
+static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
                                                unsigned SyntaxVariant,
                                                const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
+                                               const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new NVPTXInstPrinter(MAI, MII, MRI, STI);
+    return new NVPTXInstPrinter(MAI, MII, MRI);
   return nullptr;
 }
 
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 93fabf6..96abfa8 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
                             "Target SM 2.1">;
 def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
                             "Target SM 3.0">;
+def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32",
+                            "Target SM 3.2">;
 def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
                             "Target SM 3.5">;
+def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37",
+                            "Target SM 3.7">;
 def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
                             "Target SM 5.0">;
+def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52",
+                            "Target SM 5.2">;
+def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53",
+                            "Target SM 5.3">;
 
 // PTX Versions
-def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
-                             "Use PTX version 3.0">;
-def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
-                             "Use PTX version 3.1">;
 def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
                              "Use PTX version 3.2">;
 def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
                              "Use PTX version 4.0">;
+def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41",
+                             "Use PTX version 4.1">;
+def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42",
+                             "Use PTX version 4.2">;
 
 //===----------------------------------------------------------------------===//
 // NVPTX supported processors.
@@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features>
 def : Proc<"sm_20", [SM20]>;
 def : Proc<"sm_21", [SM21]>;
 def : Proc<"sm_30", [SM30]>;
+def : Proc<"sm_32", [SM32, PTX40]>;
 def : Proc<"sm_35", [SM35]>;
-def : Proc<"sm_50", [SM50]>;
+def : Proc<"sm_37", [SM37, PTX41]>;
+def : Proc<"sm_50", [SM50, PTX40]>;
+def : Proc<"sm_52", [SM52, PTX41]>;
+def : Proc<"sm_53", [SM53, PTX42]>;
 
 
 def NVPTXInstrInfo : InstrInfo {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index cc58b07..9a71964 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -118,7 +118,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
 
   DebugLoc curLoc = MI.getDebugLoc();
 
-  if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+  if (!prevDebugLoc && !curLoc)
     return;
 
   if (prevDebugLoc == curLoc)
@@ -126,39 +126,32 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
 
   prevDebugLoc = curLoc;
 
-  if (curLoc.isUnknown())
+  if (!curLoc)
     return;
 
-  const MachineFunction *MF = MI.getParent()->getParent();
-  //const TargetMachine &TM = MF->getTarget();
-
-  const LLVMContext &ctx = MF->getFunction()->getContext();
-  DIScope Scope(curLoc.getScope(ctx));
-
-  assert((!Scope || Scope.isScope()) &&
-    "Scope of a DebugLoc should be null or a DIScope.");
+  auto *Scope = cast_or_null<MDScope>(curLoc.getScope());
   if (!Scope)
      return;
 
-  StringRef fileName(Scope.getFilename());
-  StringRef dirName(Scope.getDirectory());
+  StringRef fileName(Scope->getFilename());
+  StringRef dirName(Scope->getDirectory());
   SmallString<128> FullPathName = dirName;
   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
     sys::path::append(FullPathName, fileName);
-    fileName = FullPathName.str();
+    fileName = FullPathName;
   }
 
-  if (filenameMap.find(fileName.str()) == filenameMap.end())
+  if (filenameMap.find(fileName) == filenameMap.end())
     return;
 
   // Emit the line from the source file.
   if (InterleaveSrc)
-    this->emitSrcInText(fileName.str(), curLoc.getLine());
+    this->emitSrcInText(fileName, curLoc.getLine());
 
   std::stringstream temp;
-  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+  temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
        << " " << curLoc.getCol();
-  OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+  OutStreamer.EmitRawText(temp.str());
 }
 
 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -641,7 +634,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
     return false;
 
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (GV->getName().str() == "llvm.used")
+    if (GV->getName() == "llvm.used")
       return false;
     return true;
   }
@@ -656,7 +649,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
 
 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
-    if (othergv->getName().str() == "llvm.used")
+    if (othergv->getName() == "llvm.used")
       return true;
   }
 
@@ -780,32 +773,32 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   DbgFinder.processModule(M);
 
   unsigned i = 1;
-  for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
-    StringRef Filename(DIUnit.getFilename());
-    StringRef Dirname(DIUnit.getDirectory());
+  for (const MDCompileUnit *DIUnit : DbgFinder.compile_units()) {
+    StringRef Filename = DIUnit->getFilename();
+    StringRef Dirname = DIUnit->getDirectory();
     SmallString<128> FullPathName = Dirname;
     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
       sys::path::append(FullPathName, Filename);
-      Filename = FullPathName.str();
+      Filename = FullPathName;
     }
-    if (filenameMap.find(Filename.str()) != filenameMap.end())
+    if (filenameMap.find(Filename) != filenameMap.end())
       continue;
-    filenameMap[Filename.str()] = i;
-    OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+    filenameMap[Filename] = i;
+    OutStreamer.EmitDwarfFileDirective(i, "", Filename);
     ++i;
   }
 
-  for (DISubprogram SP : DbgFinder.subprograms()) {
-    StringRef Filename(SP.getFilename());
-    StringRef Dirname(SP.getDirectory());
+  for (MDSubprogram *SP : DbgFinder.subprograms()) {
+    StringRef Filename = SP->getFilename();
+    StringRef Dirname = SP->getDirectory();
     SmallString<128> FullPathName = Dirname;
     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
       sys::path::append(FullPathName, Filename);
-      Filename = FullPathName.str();
+      Filename = FullPathName;
     }
-    if (filenameMap.find(Filename.str()) != filenameMap.end())
+    if (filenameMap.find(Filename) != filenameMap.end())
       continue;
-    filenameMap[Filename.str()] = i;
+    filenameMap[Filename] = i;
     ++i;
   }
 }
@@ -1011,7 +1004,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
       msg.append("Error: ");
       msg.append("Symbol ");
       if (V->hasName())
-        msg.append(V->getName().str());
+        msg.append(V->getName());
       msg.append("has unsupported appending linkage type");
       llvm_unreachable(msg.c_str());
     } else if (!V->hasInternalLinkage() &&
@@ -1147,7 +1140,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
 
   const Function *demotedFunc = nullptr;
   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
-    O << "// " << GVar->getName().str() << " has been demoted\n";
+    O << "// " << GVar->getName() << " has been demoted\n";
     if (localDecls.find(demotedFunc) != localDecls.end())
       localDecls[demotedFunc].push_back(GVar);
     else {
@@ -1195,9 +1188,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
         // The frontend adds zero-initializer to variables that don't have an
         // initial value, so skip warning for this case.
         if (!GVar->getInitializer()->isNullValue()) {
-          std::string warnMsg = "initial value of '" + GVar->getName().str() +
-              "' is not allowed in addrspace(" +
-              llvm::utostr_32(PTy->getAddressSpace()) + ")";
+          std::string warnMsg =
+              ("initial value of '" + GVar->getName() +
+               "' is not allowed in addrspace(" +
+               Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
           report_fatal_error(warnMsg.c_str());
         }
       }
@@ -1771,12 +1765,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   case Type::IntegerTyID: {
     const Type *ETy = CPV->getType();
     if (ETy == Type::getInt8Ty(CPV->getContext())) {
-      unsigned char c =
-          (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
       ptr = &c;
       aggBuffer->addBytes(ptr, 1, Bytes);
     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
-      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
       ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
@@ -2086,7 +2079,7 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
 
 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   std::stringstream temp;
-  LineReader *reader = this->getReader(filename.str());
+  LineReader *reader = this->getReader(filename);
   temp << "\n//";
   temp << filename.str();
   temp << ":";
@@ -2094,7 +2087,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   temp << " ";
   temp << reader->readLine(line);
   temp << "\n";
-  this->OutStreamer.EmitRawText(Twine(temp.str()));
+  this->OutStreamer.EmitRawText(temp.str());
 }
 
 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 6d7c99c..ae63cae 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -132,9 +132,8 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
   } else {
     // GEP is a constant expression.
     Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
-        cast<Constant>(Cast->getOperand(0)),
-        Indices,
-        GEP->isInBounds());
+        GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
+        Indices, GEP->isInBounds());
     GEP->replaceAllUsesWith(
         ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
   }
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 850c020..6fd09c4 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -347,6 +347,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
                      NewOperands[0],
                      makeArrayRef(&NewOperands[1], NumOperands - 1))
                : Builder.CreateInBoundsGEP(
+                     cast<GEPOperator>(C)->getSourceElementType(),
                      NewOperands[0],
                      makeArrayRef(&NewOperands[1], NumOperands - 1));
   case Instruction::Select:
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ff74e6e..8b06657 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3893,7 +3893,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
         const SDNode *left = N0.getOperand(0).getNode();
         const SDNode *right = N0.getOperand(1).getNode();
 
-        if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
+        if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
           opIsLive = true;
 
         if (!opIsLive)
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 578401a..6ab0fad 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -70,8 +70,8 @@ static void convertTransferToLoop(
 
   // srcAddr and dstAddr are expected to be pointer types,
   // so no check is made here.
-  unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
-  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
+  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointers to (char *)
   srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -84,9 +84,11 @@ static void convertTransferToLoop(
   ind->addIncoming(ConstantInt::get(indType, 0), origBB);
 
   // load from srcAddr+ind
-  Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
+  Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
+                               srcVolatile);
   // store at dstAddr+ind
-  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
+  loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
+                   dstVolatile);
 
   // The value for ind coming from backedge is (ind + 1)
   Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
@@ -106,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   origBB->getTerminator()->setSuccessor(0, loopBB);
   IRBuilder<> builder(origBB, origBB->getTerminator());
 
-  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointer to the type of value getting stored
   dstAddr =
@@ -116,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   PHINode *ind = loop.CreatePHI(len->getType(), 0);
   ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
 
-  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
+  loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
 
   Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
   ind->addIncoming(newind, loopBB);
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index b8df5af..2cd10e8 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -52,7 +52,7 @@ public:
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
   // Emission of machine code through MCJIT is not supported.
-  bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+  bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &,
                          bool = true) override {
     return true;
   }
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index b8af04d..dc81802 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXTargetTransformInfo.h"
+#include "NVPTXUtilities.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -19,6 +20,75 @@ using namespace llvm;
 
 #define DEBUG_TYPE "NVPTXtti"
 
+// Whether the given intrinsic reads threadIdx.x/y/z.
+static bool readsThreadIndex(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+      return true;
+  }
+}
+
+static bool readsLaneId(const IntrinsicInst *II) {
+  return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+}
+
+// Whether the given intrinsic is an atomic instruction in PTX.
+static bool isNVVMAtomic(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_atomic_load_add_f32:
+    case Intrinsic::nvvm_atomic_load_inc_32:
+    case Intrinsic::nvvm_atomic_load_dec_32:
+      return true;
+  }
+}
+
+bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
+  // Without inter-procedural analysis, we conservatively assume that arguments
+  // to __device__ functions are divergent.
+  if (const Argument *Arg = dyn_cast<Argument>(V))
+    return !isKernelFunction(*Arg->getParent());
+
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Without pointer analysis, we conservatively assume values loaded from
+    // generic or local address space are divergent.
+    if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      unsigned AS = LI->getPointerAddressSpace();
+      return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
+    }
+    // Atomic instructions may cause divergence. Atomic instructions are
+    // executed sequentially across all threads in a warp. Therefore, an earlier
+    // executed thread may see different memory inputs than a later executed
+    // thread. For example, suppose *a = 0 initially.
+    //
+    //   atom.global.add.s32 d, [a], 1
+    //
+    // returns 0 for the first thread that enters the critical region, and 1 for
+    // the second thread.
+    if (I->isAtomic())
+      return true;
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      // Instructions that read threadIdx are obviously divergent.
+      if (readsThreadIndex(II) || readsLaneId(II))
+        return true;
+      // Handle the NVPTX atomic instrinsics that cannot be represented as an
+      // atomic IR instruction.
+      if (isNVVMAtomic(II))
+        return true;
+    }
+    // Conservatively consider the return value of function calls as divergent.
+    // We could analyze callees with bodies more precisely using
+    // inter-procedural analysis.
+    if (isa<CallInst>(I))
+      return true;
+  }
+
+  return false;
+}
+
 unsigned NVPTXTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index bf21e88..4280888 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -61,6 +61,8 @@ public:
 
   bool hasBranchDivergence() { return true; }
 
+  bool isSourceOfDivergence(const Value *V);
+
   unsigned getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 99a1633..90ab7a5 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1071,6 +1071,58 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     Inst = TmpInst;
     break;
   }
+  case PPC::RLWINMbm:
+  case PPC::RLWINMobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::CreateImm(MB));
+    TmpInst.addOperand(MCOperand::CreateImm(ME));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::RLWIMIbm:
+  case PPC::RLWIMIobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::CreateImm(MB));
+    TmpInst.addOperand(MCOperand::CreateImm(ME));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::RLWNMbm:
+  case PPC::RLWNMobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::CreateImm(MB));
+    TmpInst.addOperand(MCOperand::CreateImm(ME));
+    Inst = TmpInst;
+    break;
+  }
   }
 }
 
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a9f5fc7..5cbf3d9 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler {
 public:
   PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
     : MCDisassembler(STI, Ctx) {}
-  virtual ~PPCDisassembler() {}
+  ~PPCDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 311a4f2..1576544 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -51,7 +51,7 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   // Check for slwi/srwi mnemonics.
   if (MI->getOpcode() == PPC::RLWINM) {
     unsigned char SH = MI->getOperand(2).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8718743..eca37eb 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,7 +32,8 @@ public:
   }
   
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index bea88a2..420c5c8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -208,7 +208,7 @@ namespace {
   public:
     DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
 
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       bool is64 = getPointerSize() == 8;
       return createPPCMachObjectWriter(
           OS,
@@ -224,8 +224,7 @@ namespace {
     ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
       PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
 
-
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       bool is64 = getPointerSize() == 8;
       return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
     }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index b817394..3e3489f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
   }
 }
 
-MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS,
                                                bool Is64Bit,
                                                bool IsLittleEndian,
                                                uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index b9f0afb..725b47b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -44,7 +44,7 @@ public:
       : MCII(mcii), CTX(ctx),
         IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
 
-  ~PPCMCCodeEmitter() {}
+  ~PPCMCCodeEmitter() override {}
 
   unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2f7a768..423e427 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -238,14 +238,12 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
   return new PPCTargetMachOStreamer(S);
 }
 
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
-  bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
-  return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
+                                             const MCRegisterInfo &MRI) {
+  return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin());
 }
 
 extern "C" void LLVMInitializePowerPCTargetMC() {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 8b1e3b4..5f2117c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -18,6 +18,7 @@
 #undef PPC
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace llvm {
 class MCAsmBackend;
@@ -29,6 +30,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target ThePPC32Target;
@@ -42,15 +44,42 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
 MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                   StringRef TT, StringRef CPU);
 
-/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
-MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
-                                         bool Is64Bit,
-                                         bool IsLittleEndian,
-                                         uint8_t OSABI);
-/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
-MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+/// Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+                                         bool IsLittleEndian, uint8_t OSABI);
+/// Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number of
+/// 0s on either side.  The 1s are allowed to wrap from LSB to MSB, so
+/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is not,
+/// since all 1s are not contiguous.
+static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+  if (!Val)
+    return false;
+
+  if (isShiftedMask_32(Val)) {
+    // look for the first non-zero bit
+    MB = countLeadingZeros(Val);
+    // look for the first zero bit after the run of ones
+    ME = countLeadingZeros((Val - 1) ^ Val);
+    return true;
+  } else {
+    Val = ~Val; // invert mask
+    if (isShiftedMask_32(Val)) {
+      // effectively look for the first zero bit
+      ME = countLeadingZeros(Val) - 1;
+      // effectively look for the first one bit after the run of zeros
+      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+      return true;
+    }
+  }
+  // no run present
+  return false;
+}
+
 } // End llvm namespace
 
 // Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index f7259b9..44e69b7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -378,8 +378,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(
       new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f175f6d..1a02bcc 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -86,6 +86,10 @@ def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
                                         "Enable the popcnt[dw] instructions">;
+def FeatureBPERMD    : SubtargetFeature<"bpermd", "HasBPERMD", "true",
+                                        "Enable the bpermd instruction">;
+def FeatureExtDiv    : SubtargetFeature<"extdiv", "HasExtDiv", "true",
+                                        "Enable extended divide instructions">;
 def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
                                         "Enable the ldbrx instruction">;
 def FeatureCMPB      : SubtargetFeature<"cmpb", "HasCMPB", "true",
@@ -118,6 +122,10 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                         "Enable POWER8 vector instructions",
                                         [FeatureVSX, FeatureP8Altivec]>;
+def FeatureDirectMove :
+  SubtargetFeature<"direct-move", "HasDirectMove", "true",
+                   "Enable Power8 direct move instructions",
+                   [FeatureVSX]>;
 def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
                                              "HasPartwordAtomics", "true",
                                              "Enable l[bh]arx and st[bh]cx.">;
@@ -133,6 +141,38 @@ def DeprecatedMFTB   : SubtargetFeature<"", "DeprecatedMFTB", "true",
 def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
   "Treat vector data stream cache control instructions as deprecated">;
 
+/*  Since new processors generally contain a superset of features of those that
+    came before them, the idea is to make implementations of new processors
+    less error prone and easier to read.
+    Namely:
+        list<SubtargetFeature> Power8FeatureList = ...
+        list<SubtargetFeature> FutureProcessorSpecificFeatureList =
+            [ features that Power8 does not support ]
+        list<SubtargetFeature> FutureProcessorFeatureList =
+            !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+
+    Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+    well as providing a single point of definition if the feature set will be
+    used elsewhere.
+*/
+def ProcessorFeatures {
+  list<SubtargetFeature> Power7FeatureList =
+      [DirectivePwr7, FeatureAltivec, FeatureVSX,
+       FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+       FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+       FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+       FeatureFPRND, FeatureFPCVT, FeatureISEL,
+       FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+       Feature64Bit /*, Feature64BitRegs */,
+       FeatureBPERMD, FeatureExtDiv,
+       DeprecatedMFTB, DeprecatedDST];
+  list<SubtargetFeature> Power8SpecificFeatures =
+      [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
+       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+  list<SubtargetFeature> Power8FeatureList =
+      !listconcat(Power7FeatureList, Power8SpecificFeatures);
+}
+
 // Note: Future features to add when support is extended to more
 // recent ISA levels:
 //
@@ -243,33 +283,6 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
 def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
                                            FeatureFRES, FeatureFRSQRTE]>;
 
-/*  Since new processors generally contain a superset of features of those that
-    came before them, the idea is to make implementations of new processors
-    less error prone and easier to read.
-    Namely:
-        list<SubtargetFeature> Power8FeatureList = ...
-        list<SubtargetFeature> FutureProcessorSpecificFeatureList =
-            [ features that Power8 does not support ]
-        list<SubtargetFeature> FutureProcessorFeatureList =
-            !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
-
-    Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
-    well as providing a single point of definition if the feature set will be
-    used elsewhere.
-    
-*/
-def ProcessorFeatures {
-    list<SubtargetFeature> Power8FeatureList =
-        [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, 
-        FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, 
-        FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-        FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
-        FeatureFPRND, FeatureFPCVT, FeatureISEL,
-        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
-        Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
-        FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
-}
-
 def : ProcessorModel<"970", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt,
@@ -339,15 +352,7 @@ def : ProcessorModel<"pwr6x", G5Model,
                    FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit,
                    DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model,
-                  [DirectivePwr7, FeatureAltivec, FeatureVSX,
-                   FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
-                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
-                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
-                   FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
-                   Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
-                   DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : ProcessorModel<"ppc64", G5Model,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index cd60906..383a1e2 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1105,25 +1105,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
     }
   }
 
-  MachineModuleInfoELF &MMIELF =
-    MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-  MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-  if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      // L_foo$stub:
-      OutStreamer.EmitLabel(Stubs[i].first);
-      //   .long _foo
-      OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
-                                                    OutContext),
-                            isPPC64 ? 8 : 4/*size*/);
-    }
-
-    Stubs.clear();
-    OutStreamer.AddBlankLine();
-  }
-
   return AsmPrinter::doFinalization(M);
 }
 
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index fbd7b6d..002616b 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -958,6 +958,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
 }
 
 // Attempt to fast-select an integer-to-floating-point conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+//        direct moves should be implemented.
 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
   MVT DstVT;
   Type *DstTy = I->getType();
@@ -1065,6 +1067,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
 }
 
 // Attempt to fast-select a floating-point-to-integer conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+//        direct moves should be implemented.
 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   MVT DstVT, SrcVT;
   Type *DstTy = I->getType();
@@ -1444,6 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
            RetVT != MVT::i8)
     return false;
+  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+    // We can't handle boolean returns when CR bits are in use.
+    return false;
 
   // FIXME: No multi-register return values yet.
   if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3ac8e94..4f8d01b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -105,13 +105,6 @@ namespace {
       return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
     }
 
-    /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
-    /// with any number of 0s on either side.  The 1s are allowed to wrap from
-    /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
-    /// 0x0F0F0000 is not, since all 1s are not contiguous.
-    static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
-
-
     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
     /// rotate and mask opcode and mask operation.
     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
@@ -418,30 +411,6 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
                                 getSmallIPtrImm(Offset));
 }
 
-bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
-  if (!Val)
-    return false;
-
-  if (isShiftedMask_32(Val)) {
-    // look for the first non-zero bit
-    MB = countLeadingZeros(Val);
-    // look for the first zero bit after the run of ones
-    ME = countLeadingZeros((Val - 1) ^ Val);
-    return true;
-  } else {
-    Val = ~Val; // invert mask
-    if (isShiftedMask_32(Val)) {
-      // effectively look for the first zero bit
-      ME = countLeadingZeros(Val) - 1;
-      // effectively look for the first one bit after the run of zeros
-      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
-      return true;
-    }
-  }
-  // no run present
-  return false;
-}
-
 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
                                       bool isShiftMask, unsigned &SH,
                                       unsigned &MB, unsigned &ME) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 871531e..4c0b6a6 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -996,6 +996,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
+  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
+  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
+  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
   case PPCISD::LBRX:            return "PPCISD::LBRX";
@@ -1287,22 +1290,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
   return true;
 }
 
-/// isAllNegativeZeroVector - Returns true if all elements of build_vector
-/// are -0.0.
-bool PPC::isAllNegativeZeroVector(SDNode *N) {
-  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
-
-  APInt APVal, APUndef;
-  unsigned BitSize;
-  bool HasAnyUndefs;
-
-  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
-    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
-      return CFP->getValueAPF().isNegZero();
-
-  return false;
-}
-
 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -2234,7 +2221,7 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
   return DAG.getMemcpy(Op.getOperand(0), Op,
                        Op.getOperand(1), Op.getOperand(2),
-                       DAG.getConstant(12, MVT::i32), 8, false, true,
+                       DAG.getConstant(12, MVT::i32), 8, false, true, false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -3821,7 +3808,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           SDLoc dl) {
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       false, false, MachinePointerInfo(),
+                       false, false, false, MachinePointerInfo(),
                        MachinePointerInfo());
 }
 
@@ -5927,8 +5914,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
   RLI.MPI = MPI;
 }
 
+/// \brief Custom lowers floating point to integer conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
+                                                    SelectionDAG &DAG,
+                                                    SDLoc dl) const {
+  assert(Op.getOperand(0).getValueType().isFloatingPoint());
+  SDValue Src = Op.getOperand(0);
+
+  if (Src.getValueType() == MVT::f32)
+    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+  SDValue Tmp;
+  switch (Op.getSimpleValueType().SimpleTy) {
+  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+  case MVT::i32:
+    Tmp = DAG.getNode(
+        Op.getOpcode() == ISD::FP_TO_SINT
+            ? PPCISD::FCTIWZ
+            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+        dl, MVT::f64, Src);
+    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
+    break;
+  case MVT::i64:
+    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+           "i64 FP_TO_UINT is supported only with FPCVT");
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+                                                        PPCISD::FCTIDUZ,
+                      dl, MVT::f64, Src);
+    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
+    break;
+  }
+  return Tmp;
+}
+
 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                           SDLoc dl) const {
+  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
+    return LowerFP_TO_INTDirectMove(Op, DAG, dl);
+
   ReuseLoadInfo RLI;
   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
 
@@ -6006,6 +6031,38 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
 }
 
+/// \brief Custom lowers integer to floating point conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
+                                                    SelectionDAG &DAG,
+                                                    SDLoc dl) const {
+  assert((Op.getValueType() == MVT::f32 ||
+          Op.getValueType() == MVT::f64) &&
+         "Invalid floating point type as target of conversion");
+  assert(Subtarget.hasFPCVT() &&
+         "Int to FP conversions with direct moves require FPCVT");
+  SDValue FP;
+  SDValue Src = Op.getOperand(0);
+  bool SinglePrec = Op.getValueType() == MVT::f32;
+  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
+  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
+  unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
+                             (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
+
+  if (WordInt) {
+    FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
+                     dl, MVT::f64, Src);
+    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+  }
+  else {
+    FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
+    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+  }
+
+  return FP;
+}
+
 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -6041,6 +6098,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                        DAG.getConstantFP(1.0, Op.getValueType()),
                        DAG.getConstantFP(0.0, Op.getValueType()));
 
+  // If we have direct moves, we can do all the conversion, skip the store/load
+  // however, without FPCVT we can't do most conversions.
+  if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+    return LowerINT_TO_FPDirectMove(Op, DAG, dl);
+
   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
          "UINT_TO_FP is supported only with FPCVT");
 
@@ -6609,7 +6671,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   unsigned SplatBitSize;
   bool HasAnyUndefs;
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
+                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+      SplatBitSize > 32)
     return SDValue();
 
   unsigned SplatBits = APSplatBits.getZExtValue();
@@ -6676,22 +6739,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   }
 
-  // The remaining cases assume either big endian element order or
-  // a splat-size that equates to the element size of the vector
-  // to be built.  An example that doesn't work for little endian is
-  // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
-  // and a vector element size of 16 bits.  The code below will
-  // produce the vector in big endian element order, which for little
-  // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
-
-  // For now, just avoid these optimizations in that case.
-  // FIXME: Develop correct optimizations for LE with mismatched
-  // splat and element sizes.
-
-  if (Subtarget.isLittleEndian() &&
-      SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
-    return SDValue();
-
   // Check to see if this is a wide variety of vsplti*, binop self cases.
   static const signed char SplatCsts[] = {
     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -7733,6 +7780,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
     // LowerFP_TO_INT() can only handle f32 and f64.
     if (N->getOperand(0).getValueType() == MVT::ppcf128)
       return;
@@ -11023,21 +11071,23 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
-  const Function *F = MF.getFunction();
-  // When expanding a memset, require at least two QPX instructions to cover
-  // the cost of loading the value to be stored from the constant pool.
-  if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
-     (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
-      !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
-    return MVT::v4f64;
-  }
-
-  // We should use Altivec/VSX loads and stores when available. For unaligned
-  // addresses, unaligned VSX loads are only fast starting with the P8.
-  if (Subtarget.hasAltivec() && Size >= 16 &&
-      (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
-       ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
-    return MVT::v4i32;
+  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+    const Function *F = MF.getFunction();
+    // When expanding a memset, require at least two QPX instructions to cover
+    // the cost of loading the value to be stored from the constant pool.
+    if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+       (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+        !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+      return MVT::v4f64;
+    }
+
+    // We should use Altivec/VSX loads and stores when available. For unaligned
+    // addresses, unaligned VSX loads are only fast starting with the P8.
+    if (Subtarget.hasAltivec() && Size >= 16 &&
+        (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+         ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+      return MVT::v4i32;
+  }
 
   if (Subtarget.isPPC64()) {
     return MVT::i64;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 8afd7ef..7e2ebd4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -119,6 +119,15 @@ namespace llvm {
       /// resultant GPR.  Bits corresponding to other CR regs are undefined.
       MFOCRF,
 
+      /// Direct move from a VSX register to a GPR
+      MFVSR,
+
+      /// Direct move from a GPR to a VSX register (algebraic)
+      MTVSRA,
+
+      /// Direct move from a GPR to a VSX register (zero)
+      MTVSRZ,
+
       // FIXME: Remove these once the ANDI glue bug is fixed:
       /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
       /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -368,10 +377,6 @@ namespace llvm {
     /// VSPLTB/VSPLTH/VSPLTW.
     bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
 
-    /// isAllNegativeZeroVector - Returns true if all elements of build_vector
-    /// are -0.0.
-    bool isAllNegativeZeroVector(SDNode *N);
-
     /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
     /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
     unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
@@ -649,6 +654,10 @@ namespace llvm {
 
     void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
                                 SelectionDAG &DAG, SDLoc dl) const;
+    SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+                                     SDLoc dl) const;
+    SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+                                     SDLoc dl) const;
 
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 183d088..d1d67cb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -603,6 +603,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
 def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
                        "popcntd $rA, $rS", IIC_IntGeneral,
                        [(set i64:$rA, (ctpop i64:$rS))]>;
+def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "bpermd $rA, $rS, $rB", IIC_IntGeneral,
+                     [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+                     isPPC64, Requires<[HasBPERMD]>;
 
 let isCodeGenOnly = 1, isCommutable = 1 in
 def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
@@ -616,14 +620,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
                        "popcntw $rA, $rS", IIC_IntGeneral,
                        [(set i32:$rA, (ctpop i32:$rS))]>;
 
-defm DIVD  : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "divd", "$rT, $rA, $rB", IIC_IntDivD,
-                       [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "divdu", "$rT, $rA, $rB", IIC_IntDivD,
-                       [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVD  : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "divd", "$rT, $rA, $rB", IIC_IntDivD,
+                          [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+                          [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
+def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                     "divde $rT, $rA, $rB", IIC_IntDivD,
+                     [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+                     isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                      "divde. $rT, $rA, $rB", IIC_IntDivD,
+                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                      isPPC64, Requires<[HasExtDiv]>;
+def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                      "divdeu $rT, $rA, $rB", IIC_IntDivD,
+                      [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+                      isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "divdeu. $rT, $rA, $rB", IIC_IntDivD,
+                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                        isPPC64, Requires<[HasExtDiv]>;
 let isCommutable = 1 in
 defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                        "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index b7a7a1f..43c2158 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -764,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = XT{5};
 }
 
+class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                     string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let B = 0;
+}
+
 class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, 
               InstrItinClass itin, list<dag> pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 5eff156..8aecb65 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -726,6 +726,8 @@ def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
 def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
 def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
 def NaNsFPMath   : Predicate<"!TM.Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.
@@ -802,6 +804,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
   }
 }
 
+// Multiclass for instructions for which the non record form is not cracked
+// and the record form is cracked (i.e. divw, mullw, etc.)
+multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel, PPC970_DGroup_First,
+                       PPC970_DGroup_Cracked;
+  }
+}
+
 multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                       string asmbase, string asmstr, InstrItinClass itin,
                       list<dag> pattern> {
@@ -2300,14 +2319,30 @@ defm ADDC  : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                         [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
                         PPC970_DGroup_Cracked;
 
-defm DIVW  : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "divw", "$rT, $rA, $rB", IIC_IntDivW,
-                       [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "divwu", "$rT, $rA, $rB", IIC_IntDivW,
-                       [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVW  : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                          "divw", "$rT, $rA, $rB", IIC_IntDivW,
+                          [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                          "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+                          [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
+def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                     "divwe $rT, $rA, $rB", IIC_IntDivW,
+                     [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+                     Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                      "divwe. $rT, $rA, $rB", IIC_IntDivW,
+                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                      Requires<[HasExtDiv]>;
+def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                      "divweu $rT, $rA, $rB", IIC_IntDivW,
+                      [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+                      Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "divweu. $rT, $rA, $rB", IIC_IntDivW,
+                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                       Requires<[HasExtDiv]>;
 let isCommutable = 1 in {
 defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                        "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -3726,6 +3761,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)
 def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 
+def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
+                           (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
+                          (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+                           (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+
 // These generic branch instruction forms are used for the assembler parser only.
 // Defs and Uses are conservative, since we don't know the BO value.
 let PPC970_Unit = 7 in {
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index ec04da4..a98e58f 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -41,6 +41,9 @@ def PPClxvd2x  : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
 def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
                         [SDNPHasChain, SDNPMayStore]>;
 def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
+def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
+def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
 
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
                     string asmbase, string asmstr, InstrItinClass itin,
@@ -946,6 +949,7 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
    when the elements are larger than i32.
 */
 def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
 let Predicates = [HasP8Vector] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 let isCommutable = 1 in {
@@ -965,3 +969,24 @@ def XXLORC : XX3Form<60, 170,
                      [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
 } // AddedComplexity = 500
 } // HasP8Vector
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// VSX direct move instructions
+def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+                            "mfvsrd $rA, $XT", IIC_VecGeneral,
+                            [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+    Requires<[In64BitMode]>;
+def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+                             "mfvsrwz $rA, $XT", IIC_VecGeneral,
+                             [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+                            "mtvsrd $XT, $rA", IIC_VecGeneral,
+                            [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+    Requires<[In64BitMode]>;
+def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+                             "mtvsrwa $XT, $rA", IIC_VecGeneral,
+                             [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+                             "mtvsrwz $XT, $rA", IIC_VecGeneral,
+                             [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+} // HasDirectMove, HasVSX
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index 005bcaf..2947c66 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "ppc-loop-data-prefetch"
 #include "PPC.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
@@ -110,11 +111,9 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
 
   bool MadeChange = false;
 
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
-       I != E; ++I) {
-    Loop *L = *I;
-    MadeChange |= runOnLoop(L);
-  }
+  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+      MadeChange |= runOnLoop(*L);
 
   return MadeChange;
 }
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 092a4ef..b6e7799 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -22,6 +22,7 @@
 #define DEBUG_TYPE "ppc-loop-preinc-prep"
 #include "PPC.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -143,11 +144,9 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
 
   bool MadeChange = false;
 
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
-       I != E; ++I) {
-    Loop *L = *I;
-    MadeChange |= runOnLoop(L);
-  }
+  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+      MadeChange |= runOnLoop(*L);
 
   return MadeChange;
 }
@@ -159,16 +158,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
   if (!L->empty())
     return MadeChange;
 
+  DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
   BasicBlock *Header = L->getHeader();
 
   const PPCSubtarget *ST =
     TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
 
-  unsigned HeaderLoopPredCount = 0;
-  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
-       PI != PE; ++PI) {
-    ++HeaderLoopPredCount;
-  }
+  unsigned HeaderLoopPredCount =
+    std::distance(pred_begin(Header), pred_end(Header));
 
   // Collect buckets of comparable addresses used by loads and stores.
   typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
@@ -205,9 +203,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
       if (L->isLoopInvariant(PtrValue))
         continue;
 
-      const SCEV *LSCEV = SE->getSCEV(PtrValue);
-      if (!isa<SCEVAddRecExpr>(LSCEV))
+      const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+      if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
+        if (LARSCEV->getLoop() != L)
+          continue;
+      } else {
         continue;
+      }
 
       bool FoundBucket = false;
       for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
@@ -236,11 +238,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
   // returns a value (which might contribute to determining the loop's
   // iteration space), insert a new preheader for the loop.
   if (!LoopPredecessor ||
-      !LoopPredecessor->getTerminator()->getType()->isVoidTy())
+      !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
     LoopPredecessor = InsertPreheaderForLoop(L, this);
+    if (LoopPredecessor)
+      MadeChange = true;
+  }
   if (!LoopPredecessor)
     return MadeChange;
 
+  DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
+
   SmallSet<BasicBlock *, 16> BBChanged;
   for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
     // The base address of each bucket is transformed into a phi and the others
@@ -251,6 +258,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     if (!BasePtrSCEV->isAffine())
       continue;
 
+    DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+    assert(BasePtrSCEV->getLoop() == L &&
+           "AddRec for the wrong loop?");
+
     Instruction *MemI = Buckets[i].begin()->second;
     Value *BasePtr = GetPointerOperand(MemI);
     assert(BasePtr && "No pointer operand");
@@ -271,6 +282,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     if (!isSafeToExpand(BasePtrStartSCEV, *SE))
       continue;
 
+    DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
     PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
       MemI->hasName() ? MemI->getName() + ".phi" : "",
       Header->getFirstNonPHI());
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 0965cb3..6df89fe 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   unsigned OrigLen = Name.size() - PrefixLen;
 
   Name += Suffix;
-  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
   StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
 
   // If the target flags on the operand changes the name of the symbol, do that
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index ed88803..f313b0a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -21,7 +21,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
@@ -83,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() {
   HasFPCVT = false;
   HasISEL = false;
   HasPOPCNTD = false;
+  HasBPERMD = false;
+  HasExtDiv = false;
   HasCMPB = false;
   HasLDBRX = false;
   IsBookE = false;
@@ -96,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() {
   HasICBT = false;
   HasInvariantFunctionDescriptors = false;
   HasPartwordAtomics = false;
+  HasDirectMove = false;
   IsQPXStackUnaligned = false;
   HasHTM = false;
 }
@@ -110,11 +112,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     else
       CPUName = "generic";
   }
-#if (defined(__APPLE__) || defined(__linux__)) && \
-    (defined(__ppc__) || defined(__powerpc__))
-  if (CPUName == "generic")
-    CPUName = sys::getHostCPUName();
-#endif
 
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b4c1bb1..8d95508 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -101,6 +101,8 @@ protected:
   bool HasFPCVT;
   bool HasISEL;
   bool HasPOPCNTD;
+  bool HasBPERMD;
+  bool HasExtDiv;
   bool HasCMPB;
   bool HasLDBRX;
   bool IsBookE;
@@ -115,6 +117,7 @@ protected:
   bool HasICBT;
   bool HasInvariantFunctionDescriptors;
   bool HasPartwordAtomics;
+  bool HasDirectMove;
   bool HasHTM;
 
   /// When targeting QPX running a stock PPC64 Linux kernel where the stack
@@ -225,6 +228,8 @@ public:
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
   bool hasPOPCNTD() const { return HasPOPCNTD; }
+  bool hasBPERMD() const { return HasBPERMD; }
+  bool hasExtDiv() const { return HasExtDiv; }
   bool hasCMPB() const { return HasCMPB; }
   bool hasLDBRX() const { return HasLDBRX; }
   bool isBookE() const { return IsBookE; }
@@ -239,6 +244,7 @@ public:
     return HasInvariantFunctionDescriptors;
   }
   bool hasPartwordAtomics() const { return HasPartwordAtomics; }
+  bool hasDirectMove() const { return HasDirectMove; }
 
   bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
   unsigned getPlatformStackAlignment() const {
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 6493713..8aaf5e1 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
 class PPCTargetStreamer : public MCTargetStreamer {
 public:
   PPCTargetStreamer(MCStreamer &S);
-  virtual ~PPCTargetStreamer();
+  ~PPCTargetStreamer() override;
   virtual void emitTCEntry(const MCSymbol &S) = 0;
   virtual void emitMachine(StringRef CPU) = 0;
   virtual void emitAbiVersion(int AbiVersion) = 0;
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index dfe988f..01233ae 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -622,6 +622,25 @@ void foo() {
   __asm__("" ::: "cr2");
 }
 
+//===-------------------------------------------------------------------------===
+Naming convention for instruction formats is very haphazard.
+We have agreed on a naming scheme as follows:
+
+<INST_form>{_<OP_type><OP_len>}+
+
+Where:
+INST_form is the instruction format (X-form, etc.)
+OP_type is the operand type - one of OPC (opcode), RD (register destination),
+                              RS (register source),
+                              RDp (destination register pair),
+                              RSp (source register pair), IM (immediate),
+                              XO (extended opcode)
+OP_len is the length of the operand in bits
+
+VSX register operands would be of length 6 (split across two fields),
+condition register fields of length 3.
+We would not need denote reserved fields in names of instruction formats.
+
 //===----------------------------------------------------------------------===//
 
 Instruction fusion was introduced in ISA 2.06 and more opportunities added in
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 43d87d3..1d5b092 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -277,7 +277,7 @@ This will generate the following instruction sequence:
 This will almost certainly cause a load-hit-store hazard.  
 Since val is a value parameter, it should not need to be saved onto
 the stack, unless it's being done set up the vector register. Instead,
-it would be better to splat teh value into a vector register, and then
+it would be better to splat the value into a vector register, and then
 remove the (dead) stores to the stack.
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index e5d5ce2..2eb805e 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
         !cast<string>(Value),
         "The size of local memory in bytes">;
 
+def FeatureGCN : SubtargetFeature<"gcn",
+        "IsGCN",
+        "true",
+        "GCN or newer GPU">;
+
+def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
+        "GCN1Encoding",
+        "true",
+        "Encoding format for SI and CI">;
+
+def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
+        "GCN3Encoding",
+        "true",
+        "Encoding format for VI">;
 class SubtargetFeatureGeneration <string Value,
                                   list<SubtargetFeature> Implies> :
         SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
@@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
-         FeatureWavefrontSize64]>;
+         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+         FeatureGCN1Encoding]>;
 
 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+         FeatureGCN3Encoding]>;
 
 //===----------------------------------------------------------------------===//
 
@@ -197,8 +213,10 @@ def NullALU : InstrItinClass;
 
 class PredicateControl {
   Predicate SubtargetPredicate;
+  list<Predicate> AssemblerPredicates = [];
   list<Predicate> OtherPredicates = [];
   list<Predicate> Predicates = !listconcat([SubtargetPredicate],
+                                            AssemblerPredicates,
                                             OtherPredicates);
 }
 
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index d911014..b3480b4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -17,6 +17,7 @@
 //
 
 #include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "AMDGPUSubtarget.h"
@@ -574,3 +575,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
 
   OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
 }
+
+bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default:
+      // See if this is a generic print operand
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+    case 'r':
+      break;
+    }
+  }
+
+  AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
+                   *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
+  return false;
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 58ffb1e..1acff3a 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -99,6 +99,10 @@ public:
 
   void EmitEndOfAsmFile(Module &M) override;
 
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O) override;
+
 protected:
   std::vector<std::string> DisasmLines, HexLines;
   size_t DisasmLineMaxLen;
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 7341cd9..def252a 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -345,7 +345,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     unsigned NOps = N->getNumOperands();
     for (unsigned i = 0; i < NOps; i++) {
       // XXX: Why is this here?
-      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+      if (isa<RegisterSDNode>(N->getOperand(i))) {
         IsRegSeq = false;
         break;
       }
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 62a33fa..7c5235d 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -126,6 +126,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
   setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
 
   setOperationAction(ISD::FROUND, MVT::f32, Custom);
   setOperationAction(ISD::FROUND, MVT::f64, Custom);
@@ -1685,14 +1687,8 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
     const unsigned bitPos = halfBitWidth - i - 1;
     SDValue POS = DAG.getConstant(bitPos, HalfVT);
     // Get value of high bit
-    // TODO: Remove the BFE part when the optimization is fixed
-    SDValue HBit;
-    if (halfBitWidth == 32 && Subtarget->hasBFE()) {
-      HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
-    } else {
-      HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
-      HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
-    }
+    SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+    HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
     HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
 
     // Shift
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 4d08201..eeb7f3f 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -358,7 +358,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
 
 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
 
@@ -389,7 +389,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr),
 
 def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
 }]>;
 
 class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index f047ed0..7e274a9 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -124,7 +124,8 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
                                     *MF->getSubtarget().getInstrInfo(),
                                     *MF->getSubtarget().getRegisterInfo());
-      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
+      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
+                            MF->getSubtarget());
 
       // Disassemble instruction/operands to hex representation.
       SmallVector<MCFixup, 4> Fixups;
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index 175dcd8..6d5f94e 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -366,8 +366,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
       Function *F = Call->getCalledFunction();
       FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
                                                 F->isVarArg());
-      Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
-                                             F->getAttributes());
+      Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
+                                             NewType, F->getAttributes());
       Function *NewF = cast<Function>(C);
       Call->setCalledFunction(NewF);
       continue;
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 0ead652..259224a 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
       EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
       EnableVGPRSpilling(false), SGPRInitBug(false),
+      IsGCN(false), GCN1Encoding(false), GCN3Encoding(false),
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)
                     0),
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 403a3e4..aeb0817 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -71,6 +71,9 @@ private:
   int LocalMemorySize;
   bool EnableVGPRSpilling;
   bool SGPRInitBug;
+  bool IsGCN;
+  bool GCN1Encoding;
+  bool GCN3Encoding;
 
   AMDGPUFrameLowering FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6551b..c9b25a1 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -623,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
   for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
       ++It) {
     MachineInstr *instr = &(*It);
-    if (!instr->getDebugLoc().isUnknown())
+    if (instr->getDebugLoc())
       DL = instr->getDebugLoc();
   }
   return DL;
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 49f0f23..aaf9b32 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
@@ -27,76 +29,105 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
 namespace {
 
-class AMDGPUAsmParser : public MCTargetAsmParser {
-  MCSubtargetInfo &STI;
-  MCAsmParser &Parser;
-
-
-  /// @name Auto-generated Match Functions
-  /// {
-
-#define GET_ASSEMBLER_HEADER
-#include "AMDGPUGenAsmMatcher.inc"
-
-  /// }
-
-public:
-  AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
-                  const MCInstrInfo &MII, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(STI), Parser(Parser) {
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
-  }
-  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
-  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                               OperandVector &Operands, MCStreamer &Out,
-                               uint64_t &ErrorInfo,
-                               bool MatchingInlineAsm) override;
-  bool ParseDirective(AsmToken DirectiveID) override;
-  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
-  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                        SMLoc NameLoc, OperandVector &Operands) override;
-
-  bool parseCnt(int64_t &IntVal);
-  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
-};
+struct OptionalOperand;
 
 class AMDGPUOperand : public MCParsedAsmOperand {
   enum KindTy {
     Token,
-    Immediate
+    Immediate,
+    Register,
+    Expression
   } Kind;
 
+  SMLoc StartLoc, EndLoc;
+
 public:
   AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
 
+  MCContext *Ctx;
+
+  enum ImmTy {
+    ImmTyNone,
+    ImmTyDSOffset0,
+    ImmTyDSOffset1,
+    ImmTyGDS,
+    ImmTyOffset,
+    ImmTyGLC,
+    ImmTySLC,
+    ImmTyTFE,
+    ImmTyClamp,
+    ImmTyOMod
+  };
+
   struct TokOp {
     const char *Data;
     unsigned Length;
   };
 
   struct ImmOp {
+    bool IsFPImm;
+    ImmTy Type;
     int64_t Val;
   };
 
+  struct RegOp {
+    unsigned RegNo;
+    int Modifiers;
+    const MCRegisterInfo *TRI;
+  };
+
   union {
     TokOp Tok;
     ImmOp Imm;
+    RegOp Reg;
+    const MCExpr *Expr;
   };
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
     Inst.addOperand(MCOperand::CreateImm(getImm()));
   }
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    llvm_unreachable("addRegOperands");
-  }
+
   StringRef getToken() const {
     return StringRef(Tok.Data, Tok.Length);
   }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
+    if (isReg())
+      addRegOperands(Inst, N);
+    else
+      addImmOperands(Inst, N);
+  }
+
+  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers));
+    addRegOperands(Inst, N);
+  }
+
+  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
+    if (isImm())
+      addImmOperands(Inst, N);
+    else {
+      assert(isExpr());
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+    }
+  }
+
+  bool defaultTokenHasSuffix() const {
+    StringRef Token(Tok.Data, Tok.Length);
+
+    return Token.endswith("_e32") || Token.endswith("_e64");
+  }
+
   bool isToken() const override {
     return Kind == Token;
   }
@@ -105,52 +136,369 @@ public:
     return Kind == Immediate;
   }
 
+  bool isInlineImm() const {
+    float F = BitsToFloat(Imm.Val);
+    // TODO: Add 0.5pi for VI
+    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+  }
+
+  bool isDSOffset0() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset0;
+  }
+
+  bool isDSOffset1() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset1;
+  }
+
   int64_t getImm() const {
     return Imm.Val;
   }
 
+  enum ImmTy getImmTy() const {
+    assert(isImm());
+    return Imm.Type;
+  }
+
   bool isReg() const override {
-    return false;
+    return Kind == Register && Reg.Modifiers == -1;
+  }
+
+  bool isRegWithInputMods() const {
+    return Kind == Register && Reg.Modifiers != -1;
+  }
+
+  void setModifiers(unsigned Mods) {
+    assert(isReg());
+    Reg.Modifiers = Mods;
   }
 
   unsigned getReg() const override {
-    return 0;
+    return Reg.RegNo;
+  }
+
+  bool isRegOrImm() const {
+    return isReg() || isImm();
+  }
+
+  bool isRegClass(unsigned RCID) const {
+    return Reg.TRI->getRegClass(RCID).contains(getReg());
+  }
+
+  bool isSCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc64() const {
+    return isImm() || isInlineImm() ||
+           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+  }
+
+  bool isVCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVCSrc64() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  }
+
+  bool isVSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVSrc64() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
   }
 
   bool isMem() const override {
     return false;
   }
 
+  bool isExpr() const {
+    return Kind == Expression;
+  }
+
+  bool isSoppBrTarget() const {
+    return isExpr() || isImm();
+  }
+
   SMLoc getStartLoc() const override {
-    return SMLoc();
+    return StartLoc;
   }
 
   SMLoc getEndLoc() const override {
-    return SMLoc();
+    return EndLoc;
   }
 
   void print(raw_ostream &OS) const override { }
 
-  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
+  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+                                                  enum ImmTy Type = ImmTyNone,
+                                                  bool IsFPImm = false) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
     Op->Imm.Val = Val;
+    Op->Imm.IsFPImm = IsFPImm;
+    Op->Imm.Type = Type;
+    Op->StartLoc = Loc;
+    Op->EndLoc = Loc;
     return Op;
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+                                           bool HasExplicitEncodingSize = true) {
     auto Res = llvm::make_unique<AMDGPUOperand>(Token);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
+    Res->StartLoc = Loc;
+    Res->EndLoc = Loc;
     return Res;
   }
 
+  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
+                                                  SMLoc E,
+                                                  const MCRegisterInfo *TRI) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+    Op->Reg.RegNo = RegNo;
+    Op->Reg.TRI = TRI;
+    Op->Reg.Modifiers = -1;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+    Op->Expr = Expr;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  bool isDSOffset() const;
+  bool isDSOffset01() const;
   bool isSWaitCnt() const;
+  bool isMubufOffset() const;
 };
 
+class AMDGPUAsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  const MCInstrInfo &MII;
+  MCAsmParser &Parser;
+
+  unsigned ForcedEncodingSize;
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+  /// }
+
+public:
+  AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+               const MCInstrInfo &MII,
+               const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
+        ForcedEncodingSize(0){
+
+    if (!STI.getFeatureBits()) {
+      // Set default features.
+      STI.ToggleFeature("SOUTHERN_ISLANDS");
+    }
+
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  unsigned getForcedEncodingSize() const {
+    return ForcedEncodingSize;
+  }
+
+  void setForcedEncodingSize(unsigned Size) {
+    ForcedEncodingSize = Size;
+  }
+
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+  bool ParseDirective(AsmToken DirectiveID) override;
+  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                          int64_t Default = 0);
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
+                                          OperandVector &Operands,
+                                          enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
+                                     enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseOptionalOps(
+                                   const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands);
+
+
+  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
+  void cvtDS(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
+
+  bool parseCnt(int64_t &IntVal);
+  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+
+  void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseOffset(OperandVector &Operands);
+  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseGLC(OperandVector &Operands);
+  OperandMatchResultTy parseSLC(OperandVector &Operands);
+  OperandMatchResultTy parseTFE(OperandVector &Operands);
+
+  OperandMatchResultTy parseDMask(OperandVector &Operands);
+  OperandMatchResultTy parseUNorm(OperandVector &Operands);
+  OperandMatchResultTy parseR128(OperandVector &Operands);
+
+  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
+};
+
+struct OptionalOperand {
+  const char *Name;
+  AMDGPUOperand::ImmTy Type;
+  bool IsBit;
+  int64_t Default;
+  bool (*ConvertResult)(int64_t&);
+};
+
+}
+
+static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+  if (IsVgpr) {
+    switch (RegWidth) {
+      default: llvm_unreachable("Unknown register width");
+      case 1: return AMDGPU::VGPR_32RegClassID;
+      case 2: return AMDGPU::VReg_64RegClassID;
+      case 3: return AMDGPU::VReg_96RegClassID;
+      case 4: return AMDGPU::VReg_128RegClassID;
+      case 8: return AMDGPU::VReg_256RegClassID;
+      case 16: return AMDGPU::VReg_512RegClassID;
+    }
+  }
+
+  switch (RegWidth) {
+    default: llvm_unreachable("Unknown register width");
+    case 1: return AMDGPU::SGPR_32RegClassID;
+    case 2: return AMDGPU::SGPR_64RegClassID;
+    case 4: return AMDGPU::SReg_128RegClassID;
+    case 8: return AMDGPU::SReg_256RegClassID;
+    case 16: return AMDGPU::SReg_512RegClassID;
+  }
+}
+
+static unsigned getRegForName(const StringRef &RegName) {
+
+  return StringSwitch<unsigned>(RegName)
+    .Case("exec", AMDGPU::EXEC)
+    .Case("vcc", AMDGPU::VCC)
+    .Case("flat_scr", AMDGPU::FLAT_SCR)
+    .Case("m0", AMDGPU::M0)
+    .Case("scc", AMDGPU::SCC)
+    .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
+    .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+    .Case("vcc_lo", AMDGPU::VCC_LO)
+    .Case("vcc_hi", AMDGPU::VCC_HI)
+    .Case("exec_lo", AMDGPU::EXEC_LO)
+    .Case("exec_hi", AMDGPU::EXEC_HI)
+    .Default(0);
 }
 
 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
-  return true;
+  const AsmToken Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  const StringRef &RegName = Tok.getString();
+  RegNo = getRegForName(RegName);
+
+  if (RegNo) {
+    Parser.Lex();
+    return false;
+  }
+
+  // Match vgprs and sgprs
+  if (RegName[0] != 's' && RegName[0] != 'v')
+    return true;
+
+  bool IsVgpr = RegName[0] == 'v';
+  unsigned RegWidth;
+  unsigned RegIndexInClass;
+  if (RegName.size() > 1) {
+    // We have a 32-bit register
+    RegWidth = 1;
+    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
+      return true;
+    Parser.Lex();
+  } else {
+    // We have a register greater than 32-bits.
+
+    int64_t RegLo, RegHi;
+    Parser.Lex();
+    if (getLexer().isNot(AsmToken::LBrac))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegLo))
+      return true;
+
+    if (getLexer().isNot(AsmToken::Colon))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegHi))
+      return true;
+
+    if (getLexer().isNot(AsmToken::RBrac))
+      return true;
+
+    Parser.Lex();
+    RegWidth = (RegHi - RegLo) + 1;
+    if (IsVgpr) {
+      // VGPR registers aren't aligned.
+      RegIndexInClass = RegLo;
+    } else {
+      // SGPR registers are aligned.  Max alignment is 4 dwords.
+      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+    }
+  }
+
+  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
+  unsigned RC = getRegClass(IsVgpr, RegWidth);
+  if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+    return true;
+  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
+  return false;
+}
+
+unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+
+  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+
+  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
+      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
+    return Match_InvalidOperand;
+
+  return Match_Success;
 }
 
 
@@ -162,22 +510,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   MCInst Inst;
 
   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
-  case Match_Success:
-    Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
-    return false;
-  case Match_MissingFeature:
-    return Error(IDLoc, "instruction use requires an option to be enabled");
-  case Match_MnemonicFail:
-    return Error(IDLoc, "unrecognized instruction mnemonic");
-  case Match_InvalidOperand: {
-    if (ErrorInfo != ~0ULL) {
-      if (ErrorInfo >= Operands.size())
-        return Error(IDLoc, "too few operands for instruction");
+    default: break;
+    case Match_Success:
+      Inst.setLoc(IDLoc);
+      Out.EmitInstruction(Inst, STI);
+      return false;
+    case Match_MissingFeature:
+      return Error(IDLoc, "missing feature");
+
+    case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+
+    case Match_InvalidOperand: {
+      SMLoc ErrorLoc = IDLoc;
+      if (ErrorInfo != ~0ULL) {
+        if (ErrorInfo >= Operands.size()) {
+          return Error(IDLoc, "too few operands for instruction");
+        }
 
+        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+        if (ErrorLoc == SMLoc())
+          ErrorLoc = IDLoc;
+      }
+      return Error(ErrorLoc, "invalid operand for instruction");
     }
-    return Error(IDLoc, "invalid operand for instruction");
-  }
   }
   llvm_unreachable("Implement any new match types added!");
 }
@@ -186,6 +542,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   return true;
 }
 
+static bool operandsHaveModifiers(const OperandVector &Operands) {
+
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+    if (Op.isRegWithInputMods())
+      return true;
+    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
+                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
+      return true;
+  }
+  return false;
+}
+
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
@@ -194,17 +563,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
   // If we successfully parsed the operand or if there as an error parsing,
   // we are done.
-  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+  //
+  // If we are parsing after we reach EndOfStatement then this means we
+  // are appending default values to the Operands list.  This is only done
+  // by custom parser, so we shouldn't continue on to the generic parsing.
+  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+      getLexer().is(AsmToken::EndOfStatement))
     return ResTy;
 
+  bool Negate = false, Abs = false;
+  if (getLexer().getKind()== AsmToken::Minus) {
+    Parser.Lex();
+    Negate = true;
+  }
+
+  if (getLexer().getKind() == AsmToken::Pipe) {
+    Parser.Lex();
+    Abs = true;
+  }
+
   switch(getLexer().getKind()) {
     case AsmToken::Integer: {
+      SMLoc S = Parser.getTok().getLoc();
       int64_t IntVal;
       if (getParser().parseAbsoluteExpression(IntVal))
         return MatchOperand_ParseFail;
-      Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
+      APInt IntVal32(32, IntVal);
+      if (IntVal32.getSExtValue() != IntVal) {
+        Error(S, "invalid immediate: only 32-bit values are legal");
+        return MatchOperand_ParseFail;
+      }
+
+      IntVal = IntVal32.getSExtValue();
+      if (Negate)
+        IntVal *= -1;
+      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
       return MatchOperand_Success;
     }
+    case AsmToken::Real: {
+      // FIXME: We should emit an error if a double precisions floating-point
+      // value is used.  I'm not sure the best way to detect this.
+      SMLoc S = Parser.getTok().getLoc();
+      int64_t IntVal;
+      if (getParser().parseAbsoluteExpression(IntVal))
+        return MatchOperand_ParseFail;
+
+      APFloat F((float)BitsToDouble(IntVal));
+      if (Negate)
+        F.changeSign();
+      Operands.push_back(
+          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+      return MatchOperand_Success;
+    }
+    case AsmToken::Identifier: {
+      SMLoc S, E;
+      unsigned RegNo;
+      if (!ParseRegister(RegNo, S, E)) {
+
+        bool HasModifiers = operandsHaveModifiers(Operands);
+        unsigned Modifiers = 0;
+
+        if (Negate)
+          Modifiers |= 0x1;
+
+        if (Abs) {
+          if (getLexer().getKind() != AsmToken::Pipe)
+            return MatchOperand_ParseFail;
+          Parser.Lex();
+          Modifiers |= 0x2;
+        }
+
+        if (Modifiers && !HasModifiers) {
+          // We are adding a modifier to src1 or src2 and previous sources
+          // don't have modifiers, so we need to go back and empty modifers
+          // for each previous source.
+          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
+               --PrevRegIdx) {
+
+            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
+            RegOp.setModifiers(0);
+          }
+        }
+
+
+        Operands.push_back(AMDGPUOperand::CreateReg(
+            RegNo, S, E, getContext().getRegisterInfo()));
+
+        if (HasModifiers || Modifiers) {
+          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
+          RegOp.setModifiers(Modifiers);
+
+        }
+     }  else {
+      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
+                                                    S));
+      Parser.Lex();
+     }
+     return MatchOperand_Success;
+    }
     default:
       return MatchOperand_NoMatch;
   }
@@ -213,22 +669,282 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
                                        StringRef Name,
                                        SMLoc NameLoc, OperandVector &Operands) {
+
+  // Clear any forced encodings from the previous instruction.
+  setForcedEncodingSize(0);
+
+  if (Name.endswith("_e64"))
+    setForcedEncodingSize(64);
+  else if (Name.endswith("_e32"))
+    setForcedEncodingSize(32);
+
   // Add the instruction mnemonic
   Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
 
-  if (getLexer().is(AsmToken::EndOfStatement))
-    return false;
+  while (!getLexer().is(AsmToken::EndOfStatement)) {
+    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+
+    // Eat the comma or space if there is one.
+    if (getLexer().is(AsmToken::Comma))
+      Parser.Lex();
 
-  AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
-  switch (Res) {
-    case MatchOperand_Success: return false;
-    case MatchOperand_ParseFail: return Error(NameLoc,
-                                              "Failed parsing operand");
-    case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
+    switch (Res) {
+      case MatchOperand_Success: break;
+      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
+                                                "failed parsing operand.");
+      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
+                                              "not a valid operand.");
+    }
   }
-  return true;
+
+  // Once we reach end of statement, continue parsing so we can add default
+  // values for optional arguments.
+  AMDGPUAsmParser::OperandMatchResultTy Res;
+  while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
+    if (Res != MatchOperand_Success)
+      return Error(getLexer().getLoc(), "failed parsing operand.");
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                    int64_t Default) {
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    Int = Default;
+    return MatchOperand_Success;
+  }
+
+  switch(getLexer().getKind()) {
+    default: return MatchOperand_NoMatch;
+    case AsmToken::Identifier: {
+      StringRef OffsetName = Parser.getTok().getString();
+      if (!OffsetName.equals(Prefix))
+        return MatchOperand_NoMatch;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Colon))
+        return MatchOperand_ParseFail;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+
+      if (getParser().parseAbsoluteExpression(Int))
+        return MatchOperand_ParseFail;
+      break;
+    }
+  }
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+                                    enum AMDGPUOperand::ImmTy ImmTy) {
+
+  SMLoc S = Parser.getTok().getLoc();
+  int64_t Offset = 0;
+
+  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+  if (Res != MatchOperand_Success)
+    return Res;
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
+                               enum AMDGPUOperand::ImmTy ImmTy) {
+  int64_t Bit = 0;
+  SMLoc S = Parser.getTok().getLoc();
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    switch(getLexer().getKind()) {
+      case AsmToken::Identifier: {
+        StringRef Tok = Parser.getTok().getString();
+        if (Tok == Name) {
+          Bit = 1;
+          Parser.Lex();
+        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
+          Bit = 0;
+          Parser.Lex();
+        } else {
+          return MatchOperand_NoMatch;
+        }
+        break;
+      }
+      default:
+        return MatchOperand_NoMatch;
+    }
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+static bool operandsHasOptionalOp(const OperandVector &Operands,
+                                  const OptionalOperand &OOp) {
+  for (unsigned i = 0; i < Operands.size(); i++) {
+    const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
+    if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
+        (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
+      return true;
+
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  for (const OptionalOperand &Op : OptionalOps) {
+    if (operandsHasOptionalOp(Operands, Op))
+      continue;
+    AMDGPUAsmParser::OperandMatchResultTy Res;
+    int64_t Value;
+    if (Op.IsBit) {
+      Res = parseNamedBit(Op.Name, Operands, Op.Type);
+      if (Res == MatchOperand_NoMatch)
+        continue;
+      return Res;
+    }
+
+    Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
+
+    if (Res == MatchOperand_NoMatch)
+      continue;
+
+    if (Res != MatchOperand_Success)
+      return Res;
+
+    if (Op.ConvertResult && !Op.ConvertResult(Value)) {
+      return MatchOperand_ParseFail;
+    }
+
+    Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
+    return MatchOperand_Success;
+  }
+  return MatchOperand_NoMatch;
+}
+
+//===----------------------------------------------------------------------===//
+// ds
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand DSOptionalOps [] = {
+  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+static const OptionalOperand DSOptionalOpsOff01 [] = {
+  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
+  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOps, Operands);
+}
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOpsOff01, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  AMDGPUAsmParser::OperandMatchResultTy Res =
+    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
+  if (Res == MatchOperand_NoMatch) {
+    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                       AMDGPUOperand::ImmTyOffset));
+    Res = MatchOperand_Success;
+  }
+  return Res;
+}
+
+bool AMDGPUOperand::isDSOffset() const {
+  return isImm() && isUInt<16>(getImm());
+}
+
+bool AMDGPUOperand::isDSOffset01() const {
+  return isImm() && isUInt<8>(getImm());
+}
+
+void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
+                                    const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
+  unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
+  unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+
+  ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
+  ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
+  ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
 }
 
+void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+  bool GDSOnly = false;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    if (Op.isToken() && Op.getToken() == "gds") {
+      GDSOnly = true;
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
+
+  if (!GDSOnly) {
+    unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+    ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  }
+  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
+}
+
+
 //===----------------------------------------------------------------------===//
 // s_waitcnt
 //===----------------------------------------------------------------------===//
@@ -283,6 +999,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   // expcnt  [6:4]
   // lgkmcnt [10:8]
   int64_t CntVal = 0x77f;
+  SMLoc S = Parser.getTok().getLoc();
 
   switch(getLexer().getKind()) {
     default: return MatchOperand_ParseFail;
@@ -299,7 +1016,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
       } while(getLexer().isNot(AsmToken::EndOfStatement));
       break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
+  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
   return MatchOperand_Success;
 }
 
@@ -307,6 +1024,245 @@ bool AMDGPUOperand::isSWaitCnt() const {
   return isImm();
 }
 
+//===----------------------------------------------------------------------===//
+// sopp branch targets
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+
+  switch (getLexer().getKind()) {
+    default: return MatchOperand_ParseFail;
+    case AsmToken::Integer: {
+      int64_t Imm;
+      if (getParser().parseAbsoluteExpression(Imm))
+        return MatchOperand_ParseFail;
+      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+      return MatchOperand_Success;
+    }
+
+    case AsmToken::Identifier:
+      Operands.push_back(AMDGPUOperand::CreateExpr(
+          MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol(
+                                  Parser.getTok().getString()), getContext()), S));
+      Parser.Lex();
+      return MatchOperand_Success;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// mubuf
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand MubufOptionalOps [] = {
+  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(MubufOptionalOps, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
+  return parseIntWithPrefix("offset", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
+  return parseNamedBit("glc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
+  return parseNamedBit("slc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
+  return parseNamedBit("tfe", Operands);
+}
+
+bool AMDGPUOperand::isMubufOffset() const {
+  return isImm() && isUInt<12>(getImm());
+}
+
+void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
+                               const OperandVector &Operands) {
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle the case where soffset is an immediate
+    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+      Op.addImmOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle tokens like 'offen' which are sometimes hard-coded into the
+    // asm string.  There are no MCInst operands for these.
+    if (Op.isToken()) {
+      continue;
+    }
+    assert(Op.isImm());
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  assert(OptionalIdx.size() == 4);
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
+  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
+  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// mimg
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
+  return parseIntWithPrefix("dmask", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
+  return parseNamedBit("unorm", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseR128(OperandVector &Operands) {
+  return parseNamedBit("r128", Operands);
+}
+
+//===----------------------------------------------------------------------===//
+// vop3
+//===----------------------------------------------------------------------===//
+
+static bool ConvertOmodMul(int64_t &Mul) {
+  if (Mul != 1 && Mul != 2 && Mul != 4)
+    return false;
+
+  Mul >>= 1;
+  return true;
+}
+
+static bool ConvertOmodDiv(int64_t &Div) {
+  if (Div == 1) {
+    Div = 0;
+    return true;
+  }
+
+  if (Div == 2) {
+    Div = 3;
+    return true;
+  }
+
+  return false;
+}
+
+static const OptionalOperand VOP3OptionalOps [] = {
+  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
+  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
+  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
+};
+
+static bool isVOP3(OperandVector &Operands) {
+  if (operandsHaveModifiers(Operands))
+    return true;
+
+  AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
+
+  if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
+    return true;
+
+  if (Operands.size() >= 5)
+    return true;
+
+  if (Operands.size() > 3) {
+    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
+    if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
+                            Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
+      return true;
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
+
+  // The value returned by this function may change after parsing
+  // an operand so store the original value here.
+  bool HasModifiers = operandsHaveModifiers(Operands);
+
+  bool IsVOP3 = isVOP3(Operands);
+  if (HasModifiers || IsVOP3 ||
+      getLexer().isNot(AsmToken::EndOfStatement) ||
+      getForcedEncodingSize() == 64) {
+
+    AMDGPUAsmParser::OperandMatchResultTy Res =
+        parseOptionalOps(VOP3OptionalOps, Operands);
+
+    if (!HasModifiers && Res == MatchOperand_Success) {
+      // We have added a modifier operation, so we need to make sure all
+      // previous register operands have modifiers
+      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
+        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+        if (Op.isReg())
+          Op.setModifiers(0);
+      }
+    }
+    return Res;
+  }
+  return MatchOperand_NoMatch;
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
+  unsigned i = 2;
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  if (operandsHaveModifiers(Operands)) {
+    for (unsigned e = Operands.size(); i != e; ++i) {
+      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+      if (Op.isRegWithInputMods()) {
+        ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
+        continue;
+      }
+      OptionalIdx[Op.getImmTy()] = i;
+    }
+
+    unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
+    unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
+
+    ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
+    ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
+  } else {
+    for (unsigned e = Operands.size(); i != e; ++i)
+      ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
+  }
+}
+
 /// Force static initialization.
 extern "C" void LLVMInitializeR600AsmParser() {
   RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index d62fd3f..279c3eb 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -20,7 +20,7 @@
 using namespace llvm;
 
 void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                             StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   OS.flush();
   printInstruction(MI, OS);
 
@@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
-  O << " offset0:";
-  printU8ImmDecOperand(MI, OpNo, O);
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset0:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
 }
 
 void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
-  O << " offset1:";
-  printU8ImmDecOperand(MI, OpNo, O);
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset1:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
 }
 
 void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
@@ -123,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
     O << " tfe";
 }
 
-void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
+                                        const MCRegisterInfo &MRI) {
   switch (reg) {
   case AMDGPU::VCC:
     O << "vcc";
@@ -293,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       break;
 
     default:
-      printRegOperand(Op.getReg(), O);
+      printRegOperand(Op.getReg(), O, MRI);
       break;
     }
   } else if (Op.isImm()) {
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 5289718..14fb511 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -29,7 +29,10 @@ public:
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  static void printRegOperand(unsigned RegNo, raw_ostream &O,
+                              const MCRegisterInfo &MRI);
 
 private:
   void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index d0c634f..f33e692 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -24,7 +24,7 @@ namespace {
 
 class AMDGPUMCObjectWriter : public MCObjectWriter {
 public:
-  AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+  AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
   void ExecutePostLayoutBinding(MCAssembler &Asm,
                                 const MCAsmLayout &Layout) override {
     //XXX: Implement if necessary.
@@ -131,7 +131,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
 public:
   ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAMDGPUELFObjectWriter(OS);
   }
 };
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 5fb94d5..59f45ff 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -33,7 +33,7 @@ protected:
 AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
   : MCELFObjectTargetWriter(false, 0, 0, false) { }
 
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
   MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
   return createELFObjectWriter(MOTW, OS, true);
 }
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index fb2deef..7b280a4 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -64,12 +64,11 @@ static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
                                                 const MCInstrInfo &MII,
-                                                const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI) {
+                                                const MCRegisterInfo &MRI) {
   return new AMDGPUInstPrinter(MAI, MII, MRI);
 }
 
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 23f0196..9a7548e 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheAMDGPUTarget;
@@ -44,7 +45,7 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                      StringRef TT, StringRef CPU);
 
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
 } // End llvm namespace
 
 #define GET_REGINFO_ENUM
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 760aa37..24f2b6d 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -49,7 +49,7 @@ public:
                   MCContext &ctx)
     : MCII(mcii), MRI(mri), Ctx(ctx) { }
 
-  ~SIMCCodeEmitter() { }
+  ~SIMCCodeEmitter() override {}
 
   /// \brief Encode the instruction and write it to the OS.
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index a34e2dc..b6b7067 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1811,7 +1811,7 @@ SDValue Swz[4], SelectionDAG &DAG) const {
 
   BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
   for (unsigned i = 0; i < 4; i++) {
-    unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
   }
@@ -1819,7 +1819,7 @@ SDValue Swz[4], SelectionDAG &DAG) const {
   SwizzleRemap.clear();
   BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
   for (unsigned i = 0; i < 4; i++) {
-    unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
   }
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 291fb04..7126c82 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -683,6 +683,11 @@ def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
 // TODO: Do these actually match the regular fmin/fmax behavior?
 def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
 def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
+// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
+// DX10 min/max returns the other operand if one is NaN,
+// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
+def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
+def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
 
 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
 // so some of the instruction names don't match the asm string.
diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 419ec8b..2fc7b02 100644
--- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -162,7 +162,7 @@ class R600TextureIntrinsicsReplacer :
     Value *SamplerId = I.getArgOperand(2);
 
     unsigned TextureType =
-        dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+        cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
 
     unsigned SrcSelect[4] = { 0, 1, 2, 3 };
     unsigned CT[4] = {1, 1, 1, 1};
@@ -186,7 +186,7 @@ class R600TextureIntrinsicsReplacer :
     Value *SamplerId = I.getArgOperand(5);
 
     unsigned TextureType =
-        dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
+        cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
 
     unsigned SrcSelect[4] = { 0, 1, 2, 3 };
     unsigned CT[4] = {1, 1, 1, 1};
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index 79f6532..d39ab3f 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass {
 
   void insertElse(BranchInst *Term);
 
-  Value *handleLoopCondition(Value *Cond, PHINode *Broken);
+  Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
 
   void handleLoop(BranchInst *Term);
 
@@ -207,7 +207,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
 }
 
 /// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
+                                                  llvm::Loop *L) {
   if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
     BasicBlock *Parent = Phi->getParent();
     PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
@@ -223,7 +224,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
       }
 
       Phi->setIncomingValue(i, BoolFalse);
-      Value *PhiArg = handleLoopCondition(Incoming, Broken);
+      Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
       NewPhi->addIncoming(PhiArg, From);
     }
 
@@ -253,7 +254,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
 
   } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
     BasicBlock *Parent = Inst->getParent();
-    TerminatorInst *Insert = Parent->getTerminator();
+    Instruction *Insert;
+    if (L->contains(Inst)) {
+      Insert = Parent->getTerminator();
+    } else {
+      Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
+    }
     Value *Args[] = { Cond, Broken };
     return CallInst::Create(IfBreak, Args, "", Insert);
 
@@ -265,14 +271,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
 
 /// \brief Handle a back edge (loop)
 void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+  BasicBlock *BB = Term->getParent();
+  llvm::Loop *L = LI->getLoopFor(BB);
   BasicBlock *Target = Term->getSuccessor(1);
   PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
 
   Value *Cond = Term->getCondition();
   Term->setCondition(BoolTrue);
-  Value *Arg = handleLoopCondition(Cond, Broken);
+  Value *Arg = handleLoopCondition(Cond, Broken, L);
 
-  BasicBlock *BB = Term->getParent();
   for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
        PI != PE; ++PI) {
 
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index bd0c3c2..43507d8 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -76,8 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
   setOperationAction(ISD::FSIN, MVT::f32, Custom);
   setOperationAction(ISD::FCOS, MVT::f32, Custom);
 
-  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
   setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
   setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
 
@@ -2089,3 +2087,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
                             cast<RegisterSDNode>(VReg)->getReg(), VT);
 }
+
+//===----------------------------------------------------------------------===//
+//                         SI Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                               const std::string &Constraint,
+                                               MVT VT) const {
+  if (Constraint == "r") {
+    switch(VT.SimpleTy) {
+      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
+      case MVT::i64:
+        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+      case MVT::i32:
+        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+    }
+  }
+
+  if (Constraint.size() > 1) {
+    const TargetRegisterClass *RC = nullptr;
+    if (Constraint[1] == 'v') {
+      RC = &AMDGPU::VGPR_32RegClass;
+    } else if (Constraint[1] == 's') {
+      RC = &AMDGPU::SGPR_32RegClass;
+    }
+
+    if (RC) {
+      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
+      if (Idx < RC->getNumRegs())
+        return std::make_pair(RC->getRegister(Idx), RC);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 92f5847..a6bc7c6 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -113,6 +113,10 @@ public:
   MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
                                   SDLoc DL,
                                   SDValue Ptr) const;
+
+  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
+                                   const TargetRegisterInfo *TRI,
+                                   const std::string &Constraint, MVT VT) const override;
 };
 
 } // End namespace llvm
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 4167590..bc693c3 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
   let AddedComplexity = -1000;
 
   let VOP3 = 1;
+  let VALU = 1;
+
+  let AsmMatchConverter = "cvtVOP3";
+  let isCodeGenOnly = 0;
+
   int Size = 8;
 }
 
@@ -181,6 +186,19 @@ class SOPKe <bits<5> op> : Enc32 {
   let Inst{31-28} = 0xb; //encoding
 }
 
+class SOPK64e <bits<5> op> : Enc64 {
+  bits <7> sdst = 0;
+  bits <16> simm16;
+  bits <32> imm;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = sdst;
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb;
+
+  let Inst{63-32} = imm;
+}
+
 class SOPPe <bits<7> op> : Enc32 {
   bits <16> simm16;
 
@@ -208,6 +226,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
   let SALU = 1;
   let SOP1 = 1;
 }
@@ -218,6 +237,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
   let SALU = 1;
   let SOP2 = 1;
 
@@ -233,6 +253,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let hasSideEffects = 0;
   let SALU = 1;
   let SOPC = 1;
+  let isCodeGenOnly = 0;
 
   let UseNamedOperandTable = 1;
 }
@@ -550,10 +571,14 @@ let Uses = [EXEC] in {
 
 class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
     VOP1Common <outs, ins, asm, pattern>,
-    VOP1e<op>;
+    VOP1e<op> {
+  let isCodeGenOnly = 0;
+}
 
 class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
+    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+  let isCodeGenOnly = 0;
+}
 
 class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
     VOPCCommon <ins, asm, pattern>, VOPCe <op>;
@@ -586,6 +611,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayStore = 1;
 
   let hasSideEffects = 0;
+  let AsmMatchConverter = "cvtDS";
   let SchedRW = [WriteLDS];
 }
 
@@ -598,6 +624,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
 
   let hasSideEffects = 0;
   let UseNamedOperandTable = 1;
+  let AsmMatchConverter = "cvtMubuf";
   let SchedRW = [WriteVMEM];
 }
 
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index d603ecb..076a0ce 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -6,6 +6,15 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+def isSICI : Predicate<
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>, AssemblerPredicate<"FeatureGCN1Encoding">;
+def isCI : Predicate<"Subtarget->getGeneration() "
+                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isVI : Predicate <
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate<"FeatureGCN3Encoding">;
 
 class vop {
   field bits<9> SI3;
@@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> {
   let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
 }
 
+def SoppBrTarget : AsmOperandClass {
+  let Name = "SoppBrTarget";
+  let ParserMethod = "parseSOppBrTarget";
+}
+
 def sopp_brtarget : Operand<OtherVT> {
   let EncoderMethod = "getSOPPBrEncoding";
   let OperandType = "OPERAND_PCREL";
+  let ParserMatchClass = SoppBrTarget;
 }
 
 include "SIInstrFormats.td"
 include "VIInstrFormats.td"
 
+def MubufOffsetMatchClass : AsmOperandClass {
+  let Name = "MubufOffset";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "DSOffset"#parser;
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset";
+}
+
+def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
+def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
+
+def DSOffset01MatchClass : AsmOperandClass {
+  let Name = "DSOffset1";
+  let ParserMethod = "parseDSOff01OptionalOps";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset01";
+}
+
+class GDSBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "GDS"#parser;
+  let PredicateMethod = "isImm";
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+}
+
+def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
+def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
+
+def GLCMatchClass : AsmOperandClass {
+  let Name = "GLC";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def SLCMatchClass : AsmOperandClass {
+  let Name = "SLC";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def TFEMatchClass : AsmOperandClass {
+  let Name = "TFE";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def OModMatchClass : AsmOperandClass {
+  let Name = "OMod";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def ClampMatchClass : AsmOperandClass {
+  let Name = "Clamp";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
 let OperandType = "OPERAND_IMMEDIATE" in {
 
 def offen : Operand<i1> {
@@ -254,35 +337,52 @@ def addr64 : Operand<i1> {
 }
 def mbuf_offset : Operand<i16> {
   let PrintMethod = "printMBUFOffset";
+  let ParserMatchClass = MubufOffsetMatchClass;
 }
-def ds_offset : Operand<i16> {
+class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
   let PrintMethod = "printDSOffset";
+  let ParserMatchClass = mc;
 }
+def ds_offset : ds_offset_base <DSOffsetMatchClass>;
+def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
+
 def ds_offset0 : Operand<i8> {
   let PrintMethod = "printDSOffset0";
+  let ParserMatchClass = DSOffset01MatchClass;
 }
 def ds_offset1 : Operand<i8> {
   let PrintMethod = "printDSOffset1";
+  let ParserMatchClass = DSOffset01MatchClass;
 }
-def gds : Operand <i1> {
+class gds_base <AsmOperandClass mc> : Operand <i1> {
   let PrintMethod = "printGDS";
+  let ParserMatchClass = mc;
 }
+def gds : gds_base <GDSMatchClass>;
+
+def gds01 : gds_base <GDS01MatchClass>;
+
 def glc : Operand <i1> {
   let PrintMethod = "printGLC";
+  let ParserMatchClass = GLCMatchClass;
 }
 def slc : Operand <i1> {
   let PrintMethod = "printSLC";
+  let ParserMatchClass = SLCMatchClass;
 }
 def tfe : Operand <i1> {
   let PrintMethod = "printTFE";
+  let ParserMatchClass = TFEMatchClass;
 }
 
 def omod : Operand <i32> {
   let PrintMethod = "printOModSI";
+  let ParserMatchClass = OModMatchClass;
 }
 
 def ClampMod : Operand <i1> {
   let PrintMethod = "printClampSI";
+  let ParserMatchClass = ClampMatchClass;
 }
 
 } // End OperandType = "OPERAND_IMMEDIATE"
@@ -392,12 +492,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
 class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
   SOP1 <outs, ins, asm, []>,
   SOP1e <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isSICI];
+}
 
 class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
   SOP1 <outs, ins, asm, []>,
   SOP1e <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
                    list<dag> pattern> {
@@ -473,12 +579,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
 class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
   SOP2<outs, ins, asm, []>,
   SOP2e<op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
   SOP2<outs, ins, asm, []>,
   SOP2e<op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
   def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
@@ -540,12 +650,28 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
 class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
   SOPK <outs, ins, asm, []>,
   SOPKe <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+  let isCodeGenOnly = 0;
+}
 
 class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
   SOPK <outs, ins, asm, []>,
   SOPKe <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+  let isCodeGenOnly = 0;
+}
+
+multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
+                   string asm = opName#opAsm> {
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
+
+  def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
+
+}
 
 multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
   def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
@@ -562,13 +688,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
   def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
     (ins SReg_32:$src0, u16imm:$src1), pattern>;
 
-  def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
-    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+  let DisableEncoding = "$dst" in {
+    def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
 
-  def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
-    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+    def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+  }
 }
 
+multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
+  op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
+  " $sdst, $simm16"
+>;
+
+multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
+                       string argAsm, string asm = opName#argAsm> {
+
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.SI>,
+            SIMCInstr<opName, SISubtarget.SI> {
+              let AssemblerPredicates = [isSICI];
+              let isCodeGenOnly = 0;
+            }
+
+  def _vi : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.VI>,
+            SIMCInstr<opName, SISubtarget.VI> {
+              let AssemblerPredicates = [isVI];
+              let isCodeGenOnly = 0;
+            }
+}
 //===----------------------------------------------------------------------===//
 // SMRD classes
 //===----------------------------------------------------------------------===//
@@ -584,13 +736,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
                     string asm> :
   SMRD <outs, ins, asm, []>,
   SMRDe <op, imm>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
                     string asm> :
   SMRD <outs, ins, asm, []>,
   SMEMe_vi <op, imm>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
                    string asm, list<dag> pattern> {
@@ -629,8 +785,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
 def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
   let PrintMethod = "printOperandAndMods";
 }
+
+def InputModsMatchClass : AsmOperandClass {
+  let Name = "RegWithInputMods";
+}
+
 def InputModsNoDefault : Operand <i32> {
   let PrintMethod = "printOperandAndMods";
+  let ParserMatchClass = InputModsMatchClass;
 }
 
 class getNumSrcArgs<ValueType Src1, ValueType Src2> {
@@ -838,7 +1000,8 @@ class AtomicNoRet <string noRetOp, bit isRet> {
 class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP1Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr <opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 
@@ -873,18 +1036,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
 class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP2Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
 
 class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
   VOP2 <op.SI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.SI>;
+  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
-  VOP2 <op.SI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.VI>;
+  VOP2 <op.VI, outs, ins, opName#asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
                      string opName, string revOp> {
@@ -930,7 +1098,8 @@ class VOP3DisableModFields <bit HasSrc0Mods,
 class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP3Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e64", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e64", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
@@ -938,22 +1107,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
 class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3e <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI>;
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3e_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI>;
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3be <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI>;
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3be_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI>;
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -1095,12 +1272,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
   }
 
   def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
-            SIMCInstr <opName, SISubtarget.SI>;
+            SIMCInstr <opName, SISubtarget.SI> {
+            let AssemblerPredicates = [isSICI];
+  }
 
   def _vi : VOP3Common <outs, ins, asm, []>,
             VOP3e_vi <op.VI3>,
             VOP3DisableFields <1, 0, 0>,
-            SIMCInstr <opName, SISubtarget.VI>;
+            SIMCInstr <opName, SISubtarget.VI> {
+            let AssemblerPredicates = [isVI];
+  }
 }
 
 multiclass VOP1_Helper <vop1 op, string opName, dag outs,
@@ -1253,7 +1434,8 @@ let isCodeGenOnly = 0 in {
 class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOPCCommon <ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
@@ -1504,7 +1686,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
 class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
   DS <outs, ins, asm, []>,
   DSe <op>,
-  SIMCInstr <opName, SISubtarget.SI>;
+  SIMCInstr <opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+}
 
 class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
   DS <outs, ins, asm, []>,
@@ -1518,6 +1702,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm
   bits<16> offset;
   let offset0 = offset{7-0};
   let offset1 = offset{15-8};
+  let isCodeGenOnly = 0;
 }
 
 class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
@@ -1545,12 +1730,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
 multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs rc:$vdst),
   dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
-                 gds:$gds, M0Reg:$m0),
+                 gds01:$gds, M0Reg:$m0),
   string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
 
-  let data0 = 0, data1 = 0 in {
+  let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
     def _si : DS_Real_si <op, opName, outs, ins, asm>;
     def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
   }
@@ -1574,12 +1759,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
 multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs),
   dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
-              ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
+              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0),
   string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
 
-  let vdst = 0 in {
+  let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
     def _si : DS_Real_si <op, opName, outs, ins, asm>;
     def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
   }
@@ -1653,7 +1838,7 @@ multiclass DS_0A_RET <bits<8> op, string opName,
 
 multiclass DS_1A_RET_GDS <bits<8> op, string opName,
   dag outs = (outs VGPR_32:$vdst),
-  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
+  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0),
   string asm = opName#" $vdst, $addr"#"$offset gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
@@ -1762,6 +1947,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
   field bits<7> VI = vi;
 }
 
+let isCodeGenOnly = 0 in {
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
+  let lds  = 0;
+}
+
+} // End let isCodeGenOnly = 0
+
+class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
+  let lds = 0;
+}
+
 class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
   bit IsAddr64 = is_addr64;
   string OpName = NAME # suffix;
@@ -1805,7 +2004,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
   def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
            MUBUFAddr64Table <0>;
 
-  let addr64 = 0 in {
+  let addr64 = 0, isCodeGenOnly = 0 in {
     def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
   }
 
@@ -1818,7 +2017,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
   def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
            MUBUFAddr64Table <1>;
 
-  let addr64 = 1 in {
+  let addr64 = 1, isCodeGenOnly = 0 in {
     def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
   }
 
@@ -1826,11 +2025,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
   // for VI appropriately.
 }
 
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
-  let lds = 0;
-}
-
 multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
                                 string asm, list<dag> pattern, bit is_return> {
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 95b2470..91e8c8c 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -27,18 +27,10 @@ def SendMsgImm : Operand<i32> {
 }
 
 def isGCN : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+            AssemblerPredicate<"FeatureGCN">;
 def isSI : Predicate<"Subtarget->getGeneration() "
                       "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
-def isSICI : Predicate<
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->;
-def isCI : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
 
 def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
 
@@ -242,9 +234,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
 >;
 } // End Defs = [SCC]
 
-defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
 
 let Uses = [SCC] in {
+  defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
   defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
 } // End Uses = [SCC]
 
@@ -387,6 +379,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
 >;
 */
 
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
 defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
 defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
 defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
@@ -400,18 +393,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
 defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
 } // End isCompare = 1
 
-let isCommutable = 1 in {
-  let Defs = [SCC], isCommutable = 1 in {
-    defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
-  }
-  defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
+let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
+    Constraints = "$sdst = $src0" in {
+  defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
+  defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
 }
 
-//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
+defm S_CBRANCH_I_FORK : SOPK_m <
+  sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
+  (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
 defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
-defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
-defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
-//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
+defm S_SETREG_B32 : SOPK_m <
+  sopk<0x13, 0x12>, "s_setreg_b32", (outs),
+  (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
+// FIXME: Not on SI?
+//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
+defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
+  sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
+  (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
+>;
 
 //===----------------------------------------------------------------------===//
 // SOPP Instructions
@@ -1630,7 +1632,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
   VOP_F32_F32_I32, AMDGPUldexp
 >;
 
-
 defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
   VOP_I32_F32_I32>; // TODO: set "Uses = dst"
 
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 7bb5dc2..f289014 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -66,7 +66,7 @@ foreach Index = 0-255 in {
 //===----------------------------------------------------------------------===//
 
 // SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
                             (add (sequence "SGPR%u", 0, 101))>;
 
 // SGPR 64-bit registers
@@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
                                (add (decimate (shl SGPR_32, 15), 4))]>;
 
 // VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
                             (add (sequence "VGPR%u", 0, 255))>;
 
 // VGPR 64-bit registers
@@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
 //  Register classes used as source and destination
 //===----------------------------------------------------------------------===//
 
+class RegImmMatcher<string name> : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addRegOrImmOperands";
+}
+
 // Special register classes for predicates and the M0 register
 def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
   let CopyCost = -1; // Theoretically it is possible to read from SCC,
@@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
 def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
 
 // Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
   (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
 >;
 
@@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
 //  SSrc_* Operands with an SGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
 
-def SSrc_32 : RegImmOperand<SReg_32>;
+def SSrc_32 : RegImmOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SSrc32">;
+}
 
-def SSrc_64 : RegImmOperand<SReg_64>;
+def SSrc_64 : RegImmOperand<SReg_64> {
+  let ParserMatchClass = RegImmMatcher<"SSrc64">;
+}
 
 //===----------------------------------------------------------------------===//
 //  SCSrc_* Operands with an SGPR or a inline constant
 //===----------------------------------------------------------------------===//
 
-def SCSrc_32 : RegInlineOperand<SReg_32>;
+def SCSrc_32 : RegInlineOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
+}
 
 //===----------------------------------------------------------------------===//
 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
@@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
 
 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
 
-def VSrc_32 : RegImmOperand<VS_32>;
+def VSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc32">;
+}
 
-def VSrc_64 : RegImmOperand<VS_64>;
+def VSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc64">;
+}
 
 //===----------------------------------------------------------------------===//
 //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
 //===----------------------------------------------------------------------===//
 
-def VCSrc_32 : RegInlineOperand<VS_32>;
+def VCSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
+}
 
-def VCSrc_64 : RegInlineOperand<VS_64>;
+def VCSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
+}
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index 27bbf4f..591ce85 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -104,7 +104,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
   SmallVector <Type*, 8> Types;
   bool NeedToReplace = false;
   Function *F = I.getCalledFunction();
-  std::string Name = F->getName().str();
+  std::string Name = F->getName();
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
     Value *Arg = I.getArgOperand(i);
     if (Arg->getType() == v16i8) {
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 5975a51..b6eebb0 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -34,7 +34,7 @@ namespace Sparc {
 #define PRINT_ALIAS_INSTR
 #include "SparcGenAsmWriter.inc"
 
-bool SparcInstPrinter::isV9() const {
+bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
   return (STI.getFeatureBits() & Sparc::FeatureV9) != 0;
 }
 
@@ -44,15 +44,15 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
 }
 
 void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot)
-{
-  if (!printAliasInstr(MI, O) && !printSparcAliasInstr(MI, O))
-    printInstruction(MI, O);
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
+  if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
   printAnnotation(O, Annot);
 }
 
-bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
-{
+bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
   switch (MI->getOpcode()) {
   default: return false;
   case SP::JMPLrr:
@@ -72,16 +72,16 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
         case SP::O7: O << "\tretl"; return true;
         }
       }
-      O << "\tjmp "; printMemOperand(MI, 1, O);
+      O << "\tjmp "; printMemOperand(MI, 1, STI, O);
       return true;
     case SP::O7: // call $addr
-      O << "\tcall "; printMemOperand(MI, 1, O);
+      O << "\tcall "; printMemOperand(MI, 1, STI, O);
       return true;
     }
   }
   case SP::V9FCMPS:  case SP::V9FCMPD:  case SP::V9FCMPQ:
   case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: {
-    if (isV9()
+    if (isV9(STI)
         || (MI->getNumOperands() != 3)
         || (!MI->getOperand(0).isReg())
         || (MI->getOperand(0).getReg() != SP::FCC0))
@@ -96,17 +96,17 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
     case SP::V9FCMPED: O << "\tfcmped "; break;
     case SP::V9FCMPEQ: O << "\tfcmpeq "; break;
     }
-    printOperand(MI, 1, O);
+    printOperand(MI, 1, STI, O);
     O << ", ";
-    printOperand(MI, 2, O);
+    printOperand(MI, 2, STI, O);
     return true;
   }
   }
 }
 
 void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
-                                    raw_ostream &O)
-{
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
   const MCOperand &MO = MI->getOperand (opNum);
 
   if (MO.isReg()) {
@@ -124,14 +124,14 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
 }
 
 void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
-                                      raw_ostream &O, const char *Modifier)
-{
-  printOperand(MI, opNum, O);
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O, const char *Modifier) {
+  printOperand(MI, opNum, STI, O);
 
   // If this is an ADD operand, emit it like normal operands.
   if (Modifier && !strcmp(Modifier, "arith")) {
     O << ", ";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum+1, STI, O);
     return;
   }
   const MCOperand &MO = MI->getOperand(opNum+1);
@@ -143,12 +143,12 @@ void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
 
   O << "+";
 
-  printOperand(MI, opNum+1, O);
+  printOperand(MI, opNum+1, STI, O);
 }
 
 void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
-                                     raw_ostream &O)
-{
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   int CC = (int)MI->getOperand(opNum).getImm();
   switch (MI->getOpcode()) {
   default: break;
@@ -171,8 +171,8 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
 }
 
 bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
-                                  raw_ostream &O)
-{
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
   llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
   return true;
 }
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index c96d5ad..0b01b88 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -22,32 +22,36 @@ namespace llvm {
 class MCOperand;
 
 class SparcInstPrinter : public MCInstPrinter {
-  const MCSubtargetInfo &STI;
 public:
- SparcInstPrinter(const MCAsmInfo &MAI,
-                  const MCInstrInfo &MII,
-                  const MCRegisterInfo &MRI,
-                  const MCSubtargetInfo &sti)
-   : MCInstPrinter(MAI, MII, MRI), STI(sti) {}
+  SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
-  bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS);
-  bool isV9() const;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                            raw_ostream &OS);
+  bool isV9(const MCSubtargetInfo &STI) const;
 
   // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O);
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx, raw_ostream &O);
+                               unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-  void printOperand(const MCInst *MI, int opNum, raw_ostream &OS);
-  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS,
-                       const char *Modifier = nullptr);
-  void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS);
-  bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-
+  void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                    raw_ostream &OS);
+  void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                       raw_ostream &OS, const char *Modifier = nullptr);
+  void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                      raw_ostream &OS);
+  bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &OS);
 };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index dcd81e3..4abb6b8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -244,7 +244,7 @@ namespace {
 
     }
 
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType);
       return createSparcELFObjectWriter(OS, is64Bit(), OSABI);
     }
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 5ba82f1..98ba7e6 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -26,7 +26,8 @@ namespace {
                                 Is64Bit ?  ELF::EM_SPARCV9 : ELF::EM_SPARC,
                                 /*HasRelocationAddend*/ true) {}
 
-    virtual ~SparcELFObjectWriter() {}
+    ~SparcELFObjectWriter() override {}
+
   protected:
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -104,9 +105,8 @@ unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
   return ELF::R_SPARC_NONE;
 }
 
-MCObjectWriter *llvm::createSparcELFObjectWriter(raw_ostream &OS,
-                                                 bool Is64Bit,
-                                                 uint8_t OSABI) {
+MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS,
+                                                 bool Is64Bit, uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new SparcELFObjectWriter(Is64Bit, OSABI);
   return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/false);
 }
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 598856f..b447ab3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -38,7 +38,7 @@ class SparcMCCodeEmitter : public MCCodeEmitter {
 public:
   SparcMCCodeEmitter(MCContext &ctx): Ctx(ctx) {}
 
-  ~SparcMCCodeEmitter() {}
+  ~SparcMCCodeEmitter() override {}
 
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 630ed1b..7895404 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -134,13 +134,12 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
   return new SparcTargetAsmStreamer(S, OS);
 }
 
-static MCInstPrinter *createSparcMCInstPrinter(const Target &T,
-                                              unsigned SyntaxVariant,
-                                              const MCAsmInfo &MAI,
-                                              const MCInstrInfo &MII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI) {
-  return new SparcInstPrinter(MAI, MII, MRI, STI);
+static MCInstPrinter *createSparcMCInstPrinter(const Triple &T,
+                                               unsigned SyntaxVariant,
+                                               const MCAsmInfo &MAI,
+                                               const MCInstrInfo &MII,
+                                               const MCRegisterInfo &MRI) {
+  return new SparcInstPrinter(MAI, MII, MRI);
 }
 
 extern "C" void LLVMInitializeSparcTargetMC() {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index d2ec991..5f38b12 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -26,6 +26,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheSparcTarget;
@@ -38,8 +39,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T,
                                     const MCRegisterInfo &MRI,
                                     StringRef TT,
                                     StringRef CPU);
-MCObjectWriter *createSparcELFObjectWriter(raw_ostream &OS,
-                                           bool Is64Bit,
+MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                            uint8_t OSABI);
 } // End llvm namespace
 
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 3159a46..c34122e 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -92,8 +92,15 @@ def : Proc<"niagara4",        [FeatureV9, FeatureV8Deprecated, UsePopc,
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def SparcAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 0;
+}
+
 def Sparc : Target {
   // Pull in Instruction Info:
   let InstructionSet = SparcInstrInfo;
   let AssemblyParsers  = [SparcAsmParser];
+  let AssemblyWriters = [SparcAsmWriter];
 }
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 0439f9d..56290e2 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -58,7 +58,6 @@ namespace {
 
     void EmitFunctionBodyStart() override;
     void EmitInstruction(const MachineInstr *MI) override;
-    void EmitEndOfAsmFile(Module &M) override;
 
     static const char *getRegisterName(unsigned RegNo) {
       return SparcInstPrinter::getRegisterName(RegNo);
@@ -442,23 +441,6 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  const TargetLoweringObjectFileELF &TLOFELF =
-    static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-  MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-  // Generate stubs for global variables.
-  MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-  if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(TLOFELF.getDataSection());
-    unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      OutStreamer.EmitLabel(Stubs[i].first);
-      OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize);
-    }
-  }
-}
-
 // Force static initialization.
 extern "C" void LLVMInitializeSparcAsmPrinter() {
   RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index c8b0570..5b964af 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -727,7 +727,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
 
     Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
                           false,        // isVolatile,
-                          (Size <= 32), // AlwaysInline if size <= 32
+                          (Size <= 32), // AlwaysInline if size <= 32,
+                          false,        // isTailCall
                           MachinePointerInfo(), MachinePointerInfo());
     ByValArgs.push_back(FIPtr);
   }
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index a3a21d6..6818291 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -23,7 +23,7 @@ class SparcTargetMachine;
 class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
   explicit SparcSelectionDAGInfo(const DataLayout &DL);
-  ~SparcSelectionDAGInfo();
+  ~SparcSelectionDAGInfo() override;
 };
 
 }
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 60a3912..336f037 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_target(SystemZCodeGen
   SystemZShortenInst.cpp
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
+  SystemZTargetTransformInfo.cpp
   )
 
 add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 23173bf..84400f8 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler {
 public:
   SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
     : MCDisassembler(STI, Ctx) {}
-  virtual ~SystemZDisassembler() {}
+  ~SystemZDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 996a492..cf1ee54 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -43,7 +43,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
 }
 
 void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 732e5fa..6f56c7b 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -39,7 +39,8 @@ public:
 
   // Override MCInstPrinter.
   void printRegName(raw_ostream &O, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
 private:
   // Print various types of operand.
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index 542aaee..6f8431d 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
 type = Library
 name = SystemZCodeGen
 parent = SystemZ
-required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
 add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index b79b1d8..1c3887a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -62,7 +62,7 @@ public:
     llvm_unreachable("SystemZ does do not have assembler relaxation");
   }
   bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createSystemZObjectWriter(OS, OSABI);
   }
 };
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 40dc48e..8dd70b9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -32,7 +32,7 @@ public:
     : MCII(mcii), Ctx(ctx) {
   }
 
-  ~SystemZMCCodeEmitter() {}
+  ~SystemZMCCodeEmitter() override {}
 
   // OVerride MCCodeEmitter.
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 2632518..ee1af02 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter {
 public:
   SystemZObjectWriter(uint8_t OSABI);
 
-  virtual ~SystemZObjectWriter();
+  ~SystemZObjectWriter() override;
 
 protected:
   // Override MCELFObjectTargetWriter.
@@ -152,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
   }
 }
 
-MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS,
                                                 uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index ffd05a9..ea56fb1 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -172,12 +172,11 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
+                                                 const MCRegisterInfo &MRI) {
   return new SystemZInstPrinter(MAI, MII, MRI);
 }
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 962c950..2b2647b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class StringRef;
 class Target;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheSystemZTarget;
@@ -77,7 +78,7 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
                                         StringRef TT, StringRef CPU);
 
-MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
 } // end namespace llvm
 
 // Defines symbolic names for SystemZ registers.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index 5f17edb..b3a7310 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -68,6 +68,18 @@ const unsigned CCMASK_TM_MSB_0       = CCMASK_0 | CCMASK_1;
 const unsigned CCMASK_TM_MSB_1       = CCMASK_2 | CCMASK_3;
 const unsigned CCMASK_TM             = CCMASK_ANY;
 
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED       = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT     = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT    = CCMASK_3;
+const unsigned CCMASK_TBEGIN               = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX   = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND      = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
 // The position of the low CC bit in an IPM result.
 const unsigned IPM_CC = 28;
 
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 18e37e3..2524733 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -255,29 +255,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  if (Triple(TM.getTargetTriple()).isOSBinFormatELF()) {
-    auto &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        OutStreamer.EmitLabel(Stubs[i].first);
-        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
-}
-
 // Force static initialization.
 extern "C" void LLVMInitializeSystemZAsmPrinter() {
   RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index a4d5b78..7f6e823 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -38,7 +38,6 @@ public:
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                              unsigned AsmVariant, const char *ExtraCode,
                              raw_ostream &OS) override;
-  void EmitEndOfAsmFile(Module &M) override;
 };
 } // end namespace llvm
 
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index a52aa25..1a58b53 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -898,6 +898,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
   }  
 
   unsigned Opcode = SystemZ::RISBG;
+  // Prefer RISBGN if available, since it does not clobber CC.
+  if (Subtarget->hasMiscellaneousExtensions())
+    Opcode = SystemZ::RISBGN;
   EVT OpcodeVT = MVT::i64;
   if (VT == MVT::i32 && Subtarget->hasHighWord()) {
     Opcode = SystemZ::RISBMux;
@@ -945,9 +948,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
 
   // See whether we can avoid an AND in the first operand by converting
   // ROSBG to RISBG.
-  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask))
+  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {
     Opcode = SystemZ::RISBG;
-           
+    // Prefer RISBGN if available, since it does not clobber CC.
+    if (Subtarget->hasMiscellaneousExtensions())
+      Opcode = SystemZ::RISBGN;
+  }
+
   EVT VT = N->getValueType(0);
   SDValue Ops[5] = {
     convertTo(SDLoc(N), MVT::i64, Op0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 0ca8bcd..21882cb 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
 #include <cctype>
 
 using namespace llvm;
@@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
       // available, or if the operand is constant.
       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
 
+      // Use POPCNT on z196 and above.
+      if (Subtarget.hasPopulationCount())
+        setOperationAction(ISD::CTPOP, VT, Custom);
+      else
+        setOperationAction(ISD::CTPOP, VT, Expand);
+
       // No special instructions for these.
-      setOperationAction(ISD::CTPOP,           VT, Expand);
       setOperationAction(ISD::CTTZ,            VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -299,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
   // Codes for which we want to perform some z-specific combinations.
   setTargetDAGCombine(ISD::SIGN_EXTEND);
 
+  // Handle intrinsics.
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
   // We want to use MVC in preference to even a single load/store pair.
   MaxStoresPerMemcpy = 0;
   MaxStoresPerMemcpyOptSize = 0;
@@ -342,6 +351,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   return Imm.isZero() || Imm.isNegZero();
 }
 
+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  // We can use CGFI or CLGFI.
+  return isInt<32>(Imm) || isUInt<32>(Imm);
+}
+
+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+  // We can use ALGFI or SLGFI.
+  return isUInt<32>(Imm) || isUInt<32>(-Imm);
+}
+
 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                            unsigned,
                                                            unsigned,
@@ -1016,6 +1035,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
 }
 
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument.  Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+                                      unsigned &CCValid) {
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_tbegin:
+    Opcode = SystemZISD::TBEGIN;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tbegin_nofloat:
+    Opcode = SystemZISD::TBEGIN_NOFLOAT;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tend:
+    Opcode = SystemZISD::TEND;
+    CCValid = SystemZ::CCMASK_TEND;
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+                                             unsigned Opcode) {
+  // Copy all operands except the intrinsic ID.
+  unsigned NumOps = Op.getNumOperands();
+  SmallVector<SDValue, 6> Ops;
+  Ops.reserve(NumOps - 1);
+  Ops.push_back(Op.getOperand(0));
+  for (unsigned I = 2; I < NumOps; ++I)
+    Ops.push_back(Op.getOperand(I));
+
+  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+  SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+  SDValue OldChain = SDValue(Op.getNode(), 1);
+  SDValue NewChain = SDValue(Intr.getNode(), 0);
+  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+  return Intr;
+}
+
 // CC is a comparison that will be implemented using an integer or
 // floating-point comparison.  Return the condition code mask for
 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
@@ -1530,6 +1596,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
     MaskVal = -(CmpVal & -CmpVal);
     NewC.ICmpType = SystemZICMP::UnsignedOnly;
   }
+  if (!MaskVal)
+    return;
 
   // Check whether the combination of mask, comparison value and comparison
   // type are suitable.
@@ -1571,9 +1639,53 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
   C.CCMask = NewCCMask;
 }
 
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+                                  SDValue Call, unsigned CCValid, uint64_t CC,
+                                  ISD::CondCode Cond) {
+  Comparison C(Call, SDValue());
+  C.Opcode = Opcode;
+  C.CCValid = CCValid;
+  if (Cond == ISD::SETEQ)
+    // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+    C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+  else if (Cond == ISD::SETNE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+    // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+    // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+  else
+    llvm_unreachable("Unexpected integer comparison type");
+  C.CCMask &= CCValid;
+  return C;
+}
+
 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
                          ISD::CondCode Cond) {
+  if (CmpOp1.getOpcode() == ISD::Constant) {
+    uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+    unsigned Opcode, CCValid;
+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+        isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+  }
   Comparison C(CmpOp0, CmpOp1);
   C.CCMask = CCMaskForCondCode(Cond);
   if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1615,6 +1727,17 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
 
 // Emit the comparison instruction described by C.
 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+  if (!C.Op1.getNode()) {
+    SDValue Op;
+    switch (C.Op0.getOpcode()) {
+    case ISD::INTRINSIC_W_CHAIN:
+      Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+      break;
+    default:
+      llvm_unreachable("Invalid comparison operands");
+    }
+    return SDValue(Op.getNode(), Op->getNumValues() - 1);
+  }
   if (C.Opcode == SystemZISD::ICMP)
     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
                        DAG.getConstant(C.ICmpType, MVT::i32));
@@ -1696,7 +1819,6 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
 }
 
 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain    = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue CmpOp0   = Op.getOperand(2);
   SDValue CmpOp1   = Op.getOperand(3);
@@ -1706,7 +1828,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
   SDValue Glue = emitCmp(DAG, DL, C);
   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
-                     Chain, DAG.getConstant(C.CCValid, MVT::i32),
+                     Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32),
                      DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
 }
 
@@ -2100,6 +2222,7 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
 
   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+                       /*isTailCall*/false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -2292,6 +2415,46 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
                                    MVT::i64, HighOp, Low32);
 }
 
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int64_t OrigBitSize = VT.getSizeInBits();
+  SDLoc DL(Op);
+
+  // Get the known-zero mask for the operand.
+  Op = Op.getOperand(0);
+  APInt KnownZero, KnownOne;
+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
+  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
+  if (NumSignificantBits == 0)
+    return DAG.getConstant(0, VT);
+
+  // Skip known-zero high parts of the operand.
+  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
+  BitSize = std::min(BitSize, OrigBitSize);
+
+  // The POPCNT instruction counts the number of bits in each byte.
+  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+  // Add up per-byte counts in a binary tree.  All bits of Op at
+  // position larger than BitSize remain zero throughout.
+  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT));
+    if (BitSize != OrigBitSize)
+      Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+                        DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT));
+    Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+  }
+
+  // Extract overall result from high byte.
+  if (BitSize > 8)
+    Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT));
+
+  return Op;
+}
+
 // Op is an atomic load.  Lower it into a normal volatile load.
 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
                                                 SelectionDAG &DAG) const {
@@ -2505,6 +2668,30 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
                                  Node->getMemoryVT(), Node->getMemOperand());
 }
 
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+  SDValue Glue = SDValue(After, After->getNumValues() - 1);
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue);
+  return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM,
+                     DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  unsigned Opcode, CCValid;
+  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+    assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+    SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+    SDValue CC = getCCResult(DAG, Glued.getNode());
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+    return SDValue();
+  }
+
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -2542,6 +2729,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerUDIVREM(Op, DAG);
   case ISD::OR:
     return lowerOR(Op, DAG);
+  case ISD::CTPOP:
+    return lowerCTPOP(Op, DAG);
   case ISD::ATOMIC_SWAP:
     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
   case ISD::ATOMIC_STORE:
@@ -2576,6 +2765,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerSTACKRESTORE(Op, DAG);
   case ISD::PREFETCH:
     return lowerPREFETCH(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return lowerINTRINSIC_W_CHAIN(Op, DAG);
   default:
     llvm_unreachable("Unexpected node to lower");
   }
@@ -2616,6 +2807,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(SEARCH_STRING);
     OPCODE(IPM);
     OPCODE(SERIALIZE);
+    OPCODE(TBEGIN);
+    OPCODE(TBEGIN_NOFLOAT);
+    OPCODE(TEND);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
     OPCODE(ATOMIC_LOADW_SUB);
@@ -3443,6 +3637,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
   return DoneMBB;
 }
 
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned Opcode,
+                                            bool NoFloat) const {
+  MachineFunction &MF = *MBB->getParent();
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+  // Update opcode.
+  MI->setDesc(TII->get(Opcode));
+
+  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+  // Make sure to add the corresponding GRSM bits if they are missing.
+  uint64_t Control = MI->getOperand(2).getImm();
+  static const unsigned GPRControlBit[16] = {
+    0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+    0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+  };
+  Control |= GPRControlBit[15];
+  if (TFI->hasFP(MF))
+    Control |= GPRControlBit[11];
+  MI->getOperand(2).setImm(Control);
+
+  // Add GPR clobbers.
+  for (int I = 0; I < 16; I++) {
+    if ((Control & GPRControlBit[I]) == 0) {
+      unsigned Reg = SystemZMC::GR64Regs[I];
+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+
+  // Add FPR clobbers.
+  if (!NoFloat && (Control & 4) != 0) {
+    for (int I = 0; I < 16; I++) {
+      unsigned Reg = SystemZMC::FP64Regs[I];
+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::
 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
   switch (MI->getOpcode()) {
@@ -3684,6 +3922,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
     return emitStringWrapper(MI, MBB, SystemZ::MVST);
   case SystemZ::SRSTLoop:
     return emitStringWrapper(MI, MBB, SystemZ::SRST);
+  case SystemZ::TBEGIN:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+  case SystemZ::TBEGIN_nofloat:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+  case SystemZ::TBEGINC:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 23c62c9..56d7ef4 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -87,6 +87,9 @@ enum {
   // the number of the register.
   EXTRACT_ACCESS,
 
+  // Count number of bits set in operand 0 per byte.
+  POPCNT,
+
   // Wrappers around the ISD opcodes of the same name.  The output and
   // first input operands are GR128s.  The trailing numbers are the
   // widths of the second operand in bits.
@@ -143,6 +146,15 @@ enum {
   // Perform a serialization operation.  (BCR 15,0 or BCR 14,0.)
   SERIALIZE,
 
+  // Transaction begin.  The first operand is the chain, the second
+  // the TDB pointer, and the third the immediate control field.
+  // Returns chain and glue.
+  TBEGIN,
+  TBEGIN_NOFLOAT,
+
+  // Transaction end.  Just the chain operand.  Returns chain and glue.
+  TEND,
+
   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
   // ATOMIC_LOAD_<op>.
   //
@@ -213,6 +225,8 @@ public:
   EVT getSetCCResultType(LLVMContext &, EVT) const override;
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+  bool isLegalICmpImmediate(int64_t Imm) const override;
+  bool isLegalAddImmediate(int64_t Imm) const override;
   bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
@@ -302,6 +316,7 @@ private:
   SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -312,6 +327,7 @@ private:
   SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -349,6 +365,10 @@ private:
   MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
                                        MachineBasicBlock *BB,
                                        unsigned Opcode) const;
+  MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          unsigned Opcode,
+                                          bool NoFloat) const;
 };
 } // end namespace llvm
 
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 9f59a1c..2d3c9e2 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -473,6 +473,17 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{15-0}  = BD2;
 }
 
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<16> BD2;
+
+  let Inst{31-16} = op;
+  let Inst{15-0}  = BD2;
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction definitions with semantics
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5128993..3a02859 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -723,9 +723,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned Start, End;
     if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
       unsigned NewOpcode;
-      if (And.RegSize == 64)
+      if (And.RegSize == 64) {
         NewOpcode = SystemZ::RISBG;
-      else {
+        // Prefer RISBGN if available, since it does not clobber CC.
+        if (STI.hasMiscellaneousExtensions())
+          NewOpcode = SystemZ::RISBGN;
+      } else {
         NewOpcode = SystemZ::RISBMux;
         Start &= 31;
         End &= 31;
@@ -1146,17 +1149,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
 
 unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   switch (Opcode) {
-  case SystemZ::L:    return SystemZ::LT;
-  case SystemZ::LY:   return SystemZ::LT;
-  case SystemZ::LG:   return SystemZ::LTG;
-  case SystemZ::LGF:  return SystemZ::LTGF;
-  case SystemZ::LR:   return SystemZ::LTR;
-  case SystemZ::LGFR: return SystemZ::LTGFR;
-  case SystemZ::LGR:  return SystemZ::LTGR;
-  case SystemZ::LER:  return SystemZ::LTEBR;
-  case SystemZ::LDR:  return SystemZ::LTDBR;
-  case SystemZ::LXR:  return SystemZ::LTXBR;
-  default:            return 0;
+  case SystemZ::L:      return SystemZ::LT;
+  case SystemZ::LY:     return SystemZ::LT;
+  case SystemZ::LG:     return SystemZ::LTG;
+  case SystemZ::LGF:    return SystemZ::LTGF;
+  case SystemZ::LR:     return SystemZ::LTR;
+  case SystemZ::LGFR:   return SystemZ::LTGFR;
+  case SystemZ::LGR:    return SystemZ::LTGR;
+  case SystemZ::LER:    return SystemZ::LTEBR;
+  case SystemZ::LDR:    return SystemZ::LTDBR;
+  case SystemZ::LXR:    return SystemZ::LTXBR;
+  // On zEC12 we prefer to use RISBGN.  But if there is a chance to
+  // actually use the condition code, we may turn it back into RISGB.
+  // Note that RISBG is not really a "load-and-test" instruction,
+  // but sets the same condition code values, so is OK to use here.
+  case SystemZ::RISBGN: return SystemZ::RISBG;
+  default:              return 0;
   }
 }
 
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index a7f7747..820f30b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1061,6 +1061,10 @@ let Defs = [CC] in {
     def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
 }
 
+// On zEC12 we have a variant of RISBG that does not set CC.
+let Predicates = [FeatureMiscellaneousExtensions] in
+  def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>;
+
 // Forms of RISBG that only affect one word of the destination register.
 // They do not set CC.
 let Predicates = [FeatureHighWord] in {
@@ -1358,6 +1362,60 @@ let Defs = [CC] in {
 }
 
 //===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureTransactionalExecution] in {
+  // Transaction Begin
+  let hasSideEffects = 1, mayStore = 1,
+      usesCustomInserter = 1, Defs = [CC] in {
+    def TBEGIN : InstSIL<0xE560,
+                         (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                         "tbegin\t$BD1, $I2",
+                         [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
+    def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                                [(z_tbegin_nofloat bdaddr12only:$BD1,
+                                                   imm32zx16:$I2)]>;
+    def TBEGINC : InstSIL<0xE561,
+                          (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                          "tbeginc\t$BD1, $I2",
+                          [(int_s390_tbeginc bdaddr12only:$BD1,
+                                             imm32zx16:$I2)]>;
+  }
+
+  // Transaction End
+  let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
+    def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+
+  // Transaction Abort
+  let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
+    def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
+                       "tabort\t$BD2",
+                       [(int_s390_tabort bdaddr12only:$BD2)]>;
+
+  // Nontransactional Store
+  let hasSideEffects = 1 in
+    def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+  // Extract Transaction Nesting Depth
+  let hasSideEffects = 1 in
+    def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+  let hasSideEffects = 1, R4 = 0 in
+    def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
+                      "ppa\t$R1, $R2, $R3", []>;
+  def : Pat<(int_s390_ppa_txassist GR32:$src),
+            (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+                 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
 // Miscellaneous Instructions.
 //===----------------------------------------------------------------------===//
 
@@ -1382,6 +1440,13 @@ let Defs = [CC] in {
 def : Pat<(ctlz GR64:$src),
           (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
 
+// Population count.  Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
+  def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
+                       "popcnt\t$R1, $R2",
+                       [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
+}
+
 // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
 def : Pat<(i64 (anyext GR32:$src)),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 51ac5da..3151052 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,9 @@ def SDT_ZI32Intrinsic       : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
 def SDT_ZPrefetch           : SDTypeProfile<0, 2,
                                             [SDTCisVT<0, i32>,
                                              SDTCisPtrTy<1>]>;
+def SDT_ZTBegin             : SDTypeProfile<0, 2,
+                                            [SDTCisPtrTy<0>,
+                                             SDTCisVT<1, i32>]>;
 
 //===----------------------------------------------------------------------===//
 // Node definitions
@@ -121,6 +124,7 @@ def z_select_ccmask     : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
 def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
 def z_extract_access    : SDNode<"SystemZISD::EXTRACT_ACCESS",
                                  SDT_ZExtractAccess>;
+def z_popcnt            : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
 def z_umul_lohi64       : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
 def z_sdivrem32         : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
 def z_sdivrem64         : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
@@ -179,6 +183,15 @@ def z_prefetch          : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
                                  [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
                                   SDNPMemOperand]>;
 
+def z_tbegin            : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+                                  SDNPSideEffect]>;
+def z_tbegin_nofloat    : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+                                  SDNPSideEffect]>;
+def z_tend              : SDNode<"SystemZISD::TEND", SDTNone,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 1594854..15614c9 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
   "Assume that the floating-point extension facility is installed"
 >;
 
+def FeaturePopulationCount : SystemZFeature<
+  "population-count", "PopulationCount",
+  "Assume that the population-count facility is installed"
+>;
+
 def FeatureFastSerialization : SystemZFeature<
   "fast-serialization", "FastSerialization",
   "Assume that the fast-serialization facility is installed"
@@ -50,13 +55,30 @@ def FeatureInterlockedAccess1 : SystemZFeature<
 >;
 def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
 
+def FeatureMiscellaneousExtensions : SystemZFeature<
+  "miscellaneous-extensions", "MiscellaneousExtensions",
+  "Assume that the miscellaneous-extensions facility is installed"
+>;
+
+def FeatureTransactionalExecution : SystemZFeature<
+  "transactional-execution", "TransactionalExecution",
+  "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+  "processor-assist", "ProcessorAssist",
+  "Assume that the processor-assist facility is installed"
+>;
+
 def : Processor<"generic", NoItineraries, []>;
 def : Processor<"z10", NoItineraries, []>;
 def : Processor<"z196", NoItineraries,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeatureFastSerialization,
-                 FeatureInterlockedAccess1]>;
+                 FeatureFPExtension, FeaturePopulationCount,
+                 FeatureFastSerialization, FeatureInterlockedAccess1]>;
 def : Processor<"zEC12", NoItineraries,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeatureFastSerialization,
-                 FeatureInterlockedAccess1]>;
+                 FeatureFPExtension, FeaturePopulationCount,
+                 FeatureFastSerialization, FeatureInterlockedAccess1,
+                 FeatureMiscellaneousExtensions,
+                 FeatureTransactionalExecution, FeatureProcessorAssist]>;
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 31a2bff..de725ae 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -10,7 +10,6 @@
 #include "SystemZSubtarget.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Host.h"
 
 using namespace llvm;
 
@@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   std::string CPUName = CPU;
   if (CPUName.empty())
     CPUName = "generic";
-#if defined(__linux__) && defined(__s390x__)
-  if (CPUName == "generic")
-    CPUName = sys::getHostCPUName();
-#endif
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
   return *this;
@@ -43,7 +38,9 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
                                    const TargetMachine &TM)
     : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
       HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
-      HasFastSerialization(false), HasInterlockedAccess1(false),
+      HasPopulationCount(false), HasFastSerialization(false),
+      HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+      HasTransactionalExecution(false), HasProcessorAssist(false),
       TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
 
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 99cb1ad..c99e552 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -38,8 +38,12 @@ protected:
   bool HasLoadStoreOnCond;
   bool HasHighWord;
   bool HasFPExtension;
+  bool HasPopulationCount;
   bool HasFastSerialization;
   bool HasInterlockedAccess1;
+  bool HasMiscellaneousExtensions;
+  bool HasTransactionalExecution;
+  bool HasProcessorAssist;
 
 private:
   Triple TargetTriple;
@@ -86,12 +90,26 @@ public:
   // Return true if the target has the floating-point extension facility.
   bool hasFPExtension() const { return HasFPExtension; }
 
+  // Return true if the target has the population-count facility.
+  bool hasPopulationCount() const { return HasPopulationCount; }
+
   // Return true if the target has the fast-serialization facility.
   bool hasFastSerialization() const { return HasFastSerialization; }
 
   // Return true if the target has interlocked-access facility 1.
   bool hasInterlockedAccess1() const { return HasInterlockedAccess1; }
 
+  // Return true if the target has the miscellaneous-extensions facility.
+  bool hasMiscellaneousExtensions() const {
+    return HasMiscellaneousExtensions;
+  }
+
+  // Return true if the target has the transactional-execution facility.
+  bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+  // Return true if the target has the processor-assist facility.
+  bool hasProcessorAssist() const { return HasProcessorAssist; }
+
   // Return true if GV can be accessed using LARL for reloc model RM
   // and code model CM.
   bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 86baccb..b2f8175 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZTargetMachine.h"
+#include "SystemZTargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
@@ -108,3 +109,9 @@ void SystemZPassConfig::addPreEmitPass() {
 TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new SystemZPassConfig(this, PM);
 }
+
+TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(SystemZTTIImpl(this, F));
+  });
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 181b926..5ded07c 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -39,6 +39,7 @@ public:
   }
   // Override LLVMTargetMachine
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
new file mode 100644
index 0000000..3337f63
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -0,0 +1,240 @@
+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a TargetTransformInfo analysis pass specific to the
+// SystemZ target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemztti"
+
+//===----------------------------------------------------------------------===//
+//
+// SystemZ cost model.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  if (Imm == 0)
+    return TTI::TCC_Free;
+
+  if (Imm.getBitWidth() <= 64) {
+    // Constants loaded via lgfi.
+    if (isInt<32>(Imm.getSExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llilf.
+    if (isUInt<32>(Imm.getZExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llihf:
+    if ((Imm.getZExtValue() & 0xffffffff) == 0)
+      return TTI::TCC_Basic;
+
+    return 2 * TTI::TCC_Basic;
+  }
+
+  return 4 * TTI::TCC_Basic;
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (Opcode) {
+  default:
+    return TTI::TCC_Free;
+  case Instruction::GetElementPtr:
+    // Always hoist the base address of a GetElementPtr. This prevents the
+    // creation of new constants for every base constant that gets constant
+    // folded with the offset.
+    if (Idx == 0)
+      return 2 * TTI::TCC_Basic;
+    return TTI::TCC_Free;
+  case Instruction::Store:
+    if (Idx == 0 && Imm.getBitWidth() <= 64) {
+      // Any 8-bit immediate store can by implemented via mvi.
+      if (BitSize == 8)
+        return TTI::TCC_Free;
+      // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
+      if (isInt<16>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::ICmp:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Comparisons against signed 32-bit immediates implemented via cgfi.
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+      // Comparisons against unsigned 32-bit immediates implemented via clgfi.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Or their negation, by swapping addition vs. subtraction.
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Mul:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // We use msgfi to multiply by 32-bit signed immediates.
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Or:
+  case Instruction::Xor:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Masks supported by oilf/xilf.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Masks supported by oihf/xihf.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0)
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::And:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Any 32-bit AND operation can by implemented via nilf.
+      if (BitSize <= 32)
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilf.
+      if (isUInt<32>(~Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilh.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
+        return TTI::TCC_Free;
+      // Some 64-bit AND operations can be implemented via risbg.
+      const SystemZInstrInfo *TII = ST->getInstrInfo();
+      unsigned Start, End;
+      if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    // Always return TCC_Free for the shift value of a shift instruction.
+    if (Idx == 1)
+      return TTI::TCC_Free;
+    break;
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+  case Instruction::BitCast:
+  case Instruction::PHI:
+  case Instruction::Call:
+  case Instruction::Select:
+  case Instruction::Ret:
+  case Instruction::Load:
+    break;
+  }
+
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (IID) {
+  default:
+    return TTI::TCC_Free;
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+    // These get expanded to include a normal addition/subtraction.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::smul_with_overflow:
+  case Intrinsic::umul_with_overflow:
+    // These get expanded to include a normal multiplication.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::experimental_stackmap:
+    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  case Intrinsic::experimental_patchpoint_void:
+  case Intrinsic::experimental_patchpoint_i64:
+    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  }
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+  assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+  if (ST->hasPopulationCount() && TyWidth <= 64)
+    return TTI::PSK_FastHardware;
+  return TTI::PSK_Software;
+}
+
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
new file mode 100644
index 0000000..d498913
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
+  typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const SystemZSubtarget *ST;
+  const SystemZTargetLowering *TLI;
+
+  const SystemZSubtarget *getST() const { return ST; }
+  const SystemZTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+    : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  SystemZTTIImpl(const SystemZTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  SystemZTTIImpl(SystemZTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 75100fb..db543f3 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -110,7 +110,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
   NameStr += DL->getPrivateGlobalPrefix();
   TM.getNameWithPrefix(NameStr, GV, Mang);
   NameStr.append(Suffix.begin(), Suffix.end());
-  return Ctx->GetOrCreateSymbol(NameStr.str());
+  return Ctx->GetOrCreateSymbol(NameStr);
 }
 
 MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index dd07f81..5807cf7 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -187,5 +187,5 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
   SmallString<60> NameStr;
   getNameWithPrefix(NameStr, GV, Mang);
   const TargetLoweringObjectFile *TLOF = getObjFileLowering();
-  return TLOF->getContext().GetOrCreateSymbol(NameStr.str());
+  return TLOF->getContext().GetOrCreateSymbol(NameStr);
 }
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 236cb1b..1a5bf16 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -183,7 +183,9 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
 }
 
 static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
-  formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
+                                      raw_pwrite_stream &OS,
+                                      LLVMCodeGenFileType codegen,
+                                      char **ErrorMessage) {
   TargetMachine* TM = unwrap(T);
   Module* Mod = unwrap(M);
 
@@ -229,8 +231,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
     *ErrorMessage = strdup(EC.message().c_str());
     return true;
   }
-  formatted_raw_ostream destf(dest);
-  bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+  bool Result = LLVMTargetMachineEmit(T, M, dest, codegen, ErrorMessage);
   dest.flush();
   return Result;
 }
@@ -238,15 +239,14 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
 LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
   LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
   LLVMMemoryBufferRef *OutMemBuf) {
-  std::string CodeString;
-  raw_string_ostream OStream(CodeString);
-  formatted_raw_ostream Out(OStream);
-  bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+  SmallString<0> CodeString;
+  raw_svector_ostream OStream(CodeString);
+  bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage);
   OStream.flush();
 
-  std::string &Data = OStream.str();
-  *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
-                                                     Data.length(), "");
+  StringRef Data = OStream.str();
+  *OutMemBuf =
+      LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), "");
   return Result;
 }
 
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c24805a..93c6ea0 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2571,7 +2571,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   SmallString<16> Tmp;
   Tmp += Base;
   Tmp += ' ';
-  Op.setTokenValue(Tmp.str());
+  Op.setTokenValue(Tmp);
 
   // If this instruction starts with an 'f', then it is a floating point stack
   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 65461af..f265f1d 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -33,15 +33,12 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "X86GenAsmWriter.inc"
 
-void X86ATTInstPrinter::printRegName(raw_ostream &OS,
-                                     unsigned RegNo) const {
-  OS << markup("<reg:")
-     << '%' << getRegisterName(RegNo)
-     << markup(">");
+void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
 }
 
 void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                  StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
@@ -60,7 +57,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   // InstrInfo.td as soon as Requires clause is supported properly
   // for InstAlias.
   if (MI->getOpcode() == X86::CALLpcrel32 &&
-      (getAvailableFeatures() & X86::Mode64Bit) != 0) {
+      (STI.getFeatureBits() & X86::Mode64Bit) != 0) {
     OS << "\tcallq\t";
     printPCRelImm(MI, 0, OS);
   }
@@ -169,8 +166,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     printRegName(O, Op.getReg());
   } else if (Op.isImm()) {
     // Print X86 immediates as signed values.
-    O << markup("<imm:")
-      << '$' << formatImm((int64_t)Op.getImm())
+    O << markup("<imm:") << '$' << formatImm((int64_t)Op.getImm())
       << markup(">");
 
     // If there are no instruction-specific comments, add a comment clarifying
@@ -182,24 +178,22 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << markup("<imm:")
-      << '$' << *Op.getExpr()
-      << markup(">");
+    O << markup("<imm:") << '$' << *Op.getExpr() << markup(">");
   }
 }
 
 void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
                                           raw_ostream &O) {
-  const MCOperand &BaseReg  = MI->getOperand(Op+X86::AddrBaseReg);
-  const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
-  const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
-  const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
+  const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
+  const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
+  const MCOperand &SegReg = MI->getOperand(Op + X86::AddrSegmentReg);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+X86::AddrSegmentReg, O);
+    printOperand(MI, Op + X86::AddrSegmentReg, O);
     O << ':';
   }
 
@@ -215,16 +209,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
   if (IndexReg.getReg() || BaseReg.getReg()) {
     O << '(';
     if (BaseReg.getReg())
-      printOperand(MI, Op+X86::AddrBaseReg, O);
+      printOperand(MI, Op + X86::AddrBaseReg, O);
 
     if (IndexReg.getReg()) {
       O << ',';
-      printOperand(MI, Op+X86::AddrIndexReg, O);
-      unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+      printOperand(MI, Op + X86::AddrIndexReg, O);
+      unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
       if (ScaleVal != 1) {
-        O << ','
-          << markup("<imm:")
-          << ScaleVal // never printed in hex.
+        O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
           << markup(">");
       }
     }
@@ -236,13 +228,13 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
 
 void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
                                     raw_ostream &O) {
-  const MCOperand &SegReg = MI->getOperand(Op+1);
+  const MCOperand &SegReg = MI->getOperand(Op + 1);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+1, O);
+    printOperand(MI, Op + 1, O);
     O << ':';
   }
 
@@ -267,13 +259,13 @@ void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
 void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
                                        raw_ostream &O) {
   const MCOperand &DispSpec = MI->getOperand(Op);
-  const MCOperand &SegReg = MI->getOperand(Op+1);
+  const MCOperand &SegReg = MI->getOperand(Op + 1);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+1, O);
+    printOperand(MI, Op + 1, O);
     O << ':';
   }
 
@@ -289,7 +281,6 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
 
 void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
                                    raw_ostream &O) {
-  O << markup("<imm:")
-    << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
+  O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
     << markup(">");
 }
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index f71cb81..62b6b73 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -24,14 +24,12 @@ class MCOperand;
 class X86ATTInstPrinter final : public MCInstPrinter {
 public:
   X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                    const MCRegisterInfo &MRI, const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-    // Initialize the set of available features.
-    setAvailableFeatures(STI.getFeatureBits());
-  }
+                    const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen, returns true if we successfully printed an
   // alias.
@@ -142,7 +140,6 @@ public:
 private:
   bool HasCustomInstComment;
 };
-
 }
 
 #endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 91d1828..4d92daf 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -33,7 +33,8 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                    StringRef Annot) {
+                                    StringRef Annot,
+                                    const MCSubtargetInfo &STI) {
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 2150144..6e371da 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -28,7 +28,8 @@ public:
     : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index a400d46..b84c983 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -360,7 +360,7 @@ public:
   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
     : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386);
   }
 };
@@ -370,7 +370,7 @@ public:
   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
       : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
                                     ELF::EM_X86_64);
   }
@@ -381,7 +381,7 @@ public:
   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
     : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64);
   }
 };
@@ -395,7 +395,7 @@ public:
     , Is64Bit(is64Bit) {
   }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86WinCOFFObjectWriter(OS, Is64Bit);
   }
 };
@@ -752,7 +752,7 @@ public:
                          StringRef CPU)
       : DarwinX86AsmBackend(T, MRI, CPU, false) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
                                      MachO::CPU_TYPE_I386,
                                      MachO::CPU_SUBTYPE_I386_ALL);
@@ -772,7 +772,7 @@ public:
                          StringRef CPU, MachO::CPUSubTypeX86 st)
       : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
                                      MachO::CPU_TYPE_X86_64, Subtype);
   }
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 76a9d2b..4508883 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -22,7 +22,8 @@ namespace {
   public:
     X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
 
-    virtual ~X86ELFObjectWriter();
+    ~X86ELFObjectWriter() override;
+
   protected:
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -248,9 +249,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
   return getRelocType32(Modifier, getType32(Type), IsPCRel);
 }
 
-MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
-                                               bool IsELF64,
-                                               uint8_t OSABI,
+MCObjectWriter *llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS,
+                                               bool IsELF64, uint8_t OSABI,
                                                uint16_t EMachine) {
   MCELFObjectTargetWriter *MOTW =
     new X86ELFObjectWriter(IsELF64, OSABI, EMachine);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 9b98a3e..e27b7cb 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -39,7 +39,7 @@ public:
     : MCII(mcii), Ctx(ctx) {
   }
 
-  ~X86MCCodeEmitter() {}
+  ~X86MCCodeEmitter() override {}
 
   bool is64BitMode(const MCSubtargetInfo &STI) const {
     return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 0946326..5bdd844 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -80,7 +80,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
   std::string ArchFS = X86_MC::ParseX86Triple(TT);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -207,14 +207,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+static MCInstPrinter *createX86MCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new X86ATTInstPrinter(MAI, MII, MRI, STI);
+    return new X86ATTInstPrinter(MAI, MII, MRI);
   if (SyntaxVariant == 1)
     return new X86IntelInstPrinter(MAI, MII, MRI);
   return nullptr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 6f50f11..dcdae1d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -31,10 +31,11 @@ class Target;
 class Triple;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheX86_32Target, TheX86_64Target;
 
-/// DWARFFlavour - Flavour of dwarf regnumbers
+/// Flavour of dwarf regnumbers
 ///
 namespace DWARFFlavour {
   enum {
@@ -42,7 +43,7 @@ namespace DWARFFlavour {
   };
 }
 
-/// N86 namespace - Native X86 register numbers
+///  Native X86 register numbers
 ///
 namespace N86 {
   enum {
@@ -57,9 +58,8 @@ namespace X86_MC {
 
   void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
 
-  /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
-  /// This is exposed so Asm parser, etc. do not need to go through
-  /// TargetRegistry.
+  /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+  /// do not need to go through TargetRegistry.
   MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
                                             StringRef FS);
 }
@@ -78,27 +78,25 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
 ///
 /// Takes ownership of \p AB and \p CE.
 MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
-                                     raw_ostream &OS, MCCodeEmitter *CE,
+                                     raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                      bool RelaxAll);
 
-/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
-MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
-                                          bool Is64Bit,
+/// Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
-MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
-                                         bool IsELF64,
-                                         uint8_t OSABI,
-                                         uint16_t EMachine);
-/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
-MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an X86 ELF object writer.
+MCObjectWriter *createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64,
+                                         uint8_t OSABI, uint16_t EMachine);
+/// Construct an X86 Win COFF object writer.
+MCObjectWriter *createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit);
 
-/// createX86_64MachORelocationInfo - Construct X86-64 Mach-O relocation info.
+/// Construct X86-64 Mach-O relocation info.
 MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
 
-/// createX86_64ELFORelocationInfo - Construct X86-64 ELF relocation info.
+/// Construct X86-64 ELF relocation info.
 MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
 } // End llvm namespace
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 7a83f4c..38539cd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -575,9 +575,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
-                                                bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
                                                         CPUType,
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index e1df5c2..bd1bc99 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -25,7 +25,7 @@ namespace {
   class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
   public:
     X86WinCOFFObjectWriter(bool Is64Bit);
-    virtual ~X86WinCOFFObjectWriter();
+    ~X86WinCOFFObjectWriter() override;
 
     unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsCrossSection,
@@ -90,7 +90,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
     llvm_unreachable("Unsupported COFF machine type.");
 }
 
-MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
                                                    bool Is64Bit) {
   MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit);
   return createWinCOFFObjectWriter(MOTW, OS);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 5690efe..92f42b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -18,8 +18,8 @@ class X86WinCOFFStreamer : public MCWinCOFFStreamer {
   Win64EH::UnwindEmitter EHStreamer;
 public:
   X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE,
-                     raw_ostream &OS)
-    : MCWinCOFFStreamer(C, AB, *CE, OS) { }
+                     raw_pwrite_stream &OS)
+      : MCWinCOFFStreamer(C, AB, *CE, OS) {}
 
   void EmitWinEHHandlerData() override;
   void EmitWindowsUnwindTables() override;
@@ -49,8 +49,8 @@ void X86WinCOFFStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
-                                           raw_ostream &OS, MCCodeEmitter *CE,
-                                           bool RelaxAll) {
+                                           raw_pwrite_stream &OS,
+                                           MCCodeEmitter *CE, bool RelaxAll) {
   X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
   S->getAssembler().setRelaxAll(RelaxAll);
   return S;
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4f9836d..d13f155 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -273,17 +273,15 @@ def : SilvermontProc<"silvermont">;
 def : SilvermontProc<"slm">; // Legacy alias.
 
 // "Arrandale" along with corei3 and corei5
-class NehalemProc<string Name, list<SubtargetFeature> AdditionalFeatures>
-    : ProcessorModel<Name, SandyBridgeModel, !listconcat([
-                                                           FeatureSSE42,
-                                                           FeatureCMPXCHG16B,
-                                                           FeatureSlowBTMem,
-                                                           FeatureFastUAMem,
-                                                           FeaturePOPCNT
-                                                         ],
-                                                         AdditionalFeatures)>;
-def : NehalemProc<"nehalem", []>;
-def : NehalemProc<"corei7", [FeatureAES]>;
+class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+                                   FeatureSSE42,
+                                   FeatureCMPXCHG16B,
+                                   FeatureSlowBTMem,
+                                   FeatureFastUAMem,
+                                   FeaturePOPCNT
+                                 ]>;
+def : NehalemProc<"nehalem">;
+def : NehalemProc<"corei7">;
 
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index f6033a7..2ed4975 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -523,7 +523,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
       // must be registered in .sxdata.  Use of any unregistered handlers will
       // cause the process to terminate immediately.  LLVM does not know how to
       // register any SEH handlers, so its object files should be safe.
-      S->setAbsolute();
       OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
       OutStreamer.EmitAssignment(
           S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
@@ -723,28 +722,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     }
   }
 
-  if (TT.isOSBinFormatELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (const auto &Stub : Stubs) {
-        OutStreamer.EmitLabel(Stub.first);
-        OutStreamer.EmitSymbolValue(Stub.second.getPointer(),
-                                    TD->getPointerSize());
-      }
-      Stubs.clear();
-    }
-
+  if (TT.isOSBinFormatELF())
     SM.serializeToStackMapSection();
-  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cba140f..cdf10a7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2417,6 +2417,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     // FIXME may need to add RegState::Debug to any registers produced,
     // although ESP/EBP should be the only ones at the moment.
+    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+           "Expected inlined-at fields to agree");
     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
         .addImm(0)
         .addMetadata(DI->getVariable())
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index fb12ce5..5da7acf 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2187,7 +2187,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
       break;
 
-    unsigned ShlOp, Op;
+    unsigned ShlOp, AddOp, Op;
     MVT CstVT = NVT;
 
     // Check the minimum bitwidth for the new constant.
@@ -2208,6 +2208,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i32:
       assert(CstVT == MVT::i8);
       ShlOp = X86::SHL32ri;
+      AddOp = X86::ADD32rr;
 
       switch (Opcode) {
       default: llvm_unreachable("Impossible opcode");
@@ -2219,6 +2220,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i64:
       assert(CstVT == MVT::i8 || CstVT == MVT::i32);
       ShlOp = X86::SHL64ri;
+      AddOp = X86::ADD64rr;
 
       switch (Opcode) {
       default: llvm_unreachable("Impossible opcode");
@@ -2232,6 +2234,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     // Emit the smaller op and the shift.
     SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
     SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+    if (ShlVal == 1)
+      return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
+                                  SDValue(New, 0));
     return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
                                 getI8Imm(ShlVal));
   }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8b92e70..c32412a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -32,6 +32,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -2142,6 +2143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
 
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile*/false, /*AlwaysInline=*/true,
+                       /*isTailCall*/false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -2277,6 +2279,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                                           const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
 
   const Function* Fn = MF.getFunction();
   if (Fn->hasExternalLinkage() &&
@@ -2416,6 +2419,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         MFI->CreateFixedObject(1, StackSize, true));
   }
 
+  MachineModuleInfo &MMI = MF.getMMI();
+  const Function *WinEHParent = nullptr;
+  if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
+    WinEHParent = MMI.getWinEHParent(Fn);
+  bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
+  bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
+
   // Figure out if XMM registers are in use.
   assert(!(MF.getTarget().Options.UseSoftFloat &&
            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
@@ -2452,7 +2462,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     }
 
     if (IsWin64) {
-      const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
       // Get to the caller-allocated home save location.  Add 8 to account
       // for the return address.
       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2505,6 +2514,27 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     if (!MemOps.empty())
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+  } else if (IsWinEHOutlined) {
+    // Get to the caller-allocated home save location.  Add 8 to account
+    // for the return address.
+    int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+    FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
+        /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
+
+    MMI.getWinEHFuncInfo(Fn)
+        .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
+        FuncInfo->getRegSaveFrameIndex();
+
+    // Store the second integer parameter (rdx) into rsp+16 relative to the
+    // stack pointer at the entry of the function.
+    SDValue RSFIN =
+        DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
+    unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
+    Chain = DAG.getStore(
+        Val.getValue(1), dl, Val, RSFIN,
+        MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
+        /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
   }
 
   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
@@ -2571,6 +2601,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   FuncInfo->setArgumentStackSize(StackSize);
 
+  if (IsWinEHParent) {
+    int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+    SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
+    MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
+    SDValue Neg2 = DAG.getConstant(-2, MVT::i64);
+    Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
+                         MachinePointerInfo::getFixedStack(UnwindHelpFI),
+                         /*isVolatile=*/true,
+                         /*isNonTemporal=*/false, /*Alignment=*/0);
+  }
+
   return Chain;
 }
 
@@ -4420,6 +4461,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
   SDLoc dl(Op);
   SDValue V;
   bool First = true;
+
+  // SSE4.1 - use PINSRB to insert each byte directly.
+  if (Subtarget->hasSSE41()) {
+    for (unsigned i = 0; i < 16; ++i) {
+      bool isNonZero = (NonZeros & (1 << i)) != 0;
+      if (isNonZero) {
+        if (First) {
+          if (NumZero)
+            V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+          else
+            V = DAG.getUNDEF(MVT::v16i8);
+          First = false;
+        }
+        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                        MVT::v16i8, V, Op.getOperand(i),
+                        DAG.getIntPtrConstant(i));
+      }
+    }
+
+    return V;
+  }
+
+  // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
   for (unsigned i = 0; i < 16; ++i) {
     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
     if (ThisIsNonZero && First) {
@@ -5650,14 +5714,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
       }
 
+      // We can't directly insert an i8 or i16 into a vector, so zero extend
+      // it to i32 first.
       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
         if (VT.is256BitVector()) {
-          SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
-          Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+          if (Subtarget->hasAVX()) {
+            Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
+            Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+          } else {
+            // Without AVX, we need to extend to a 128-bit vector and then
+            // insert into the 256-bit vector.
+            Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+            SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+            Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+          }
         } else {
           assert(VT.is128BitVector() && "Expected an SSE value type!");
+          Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
           Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
         }
         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5877,7 +5951,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   unsigned NumElems = ResVT.getVectorNumElements();
-  if(ResVT.is256BitVector())
+  if (ResVT.is256BitVector())
     return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
 
   if (Op.getNumOperands() == 4) {
@@ -9281,15 +9355,6 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
 
-  // If we have a single input to the zero element, insert that into V1 if we
-  // can do so cheaply.
-  int NumV2Elements =
-      std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
-  if (NumV2Elements == 1 && Mask[0] >= 4)
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
-      return Insertion;
-
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
                                                 Subtarget, DAG))
     return Blend;
@@ -9432,15 +9497,6 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
-  // If we have a single input to the zero element, insert that into V1 if we
-  // can do so cheaply.
-  int NumV2Elements =
-      std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; });
-  if (NumV2Elements == 1 && Mask[0] >= 8)
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
-      return Insertion;
-
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
                                                 Subtarget, DAG))
     return Blend;
@@ -9811,6 +9867,18 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
 
+  // If we have a single input to the zero element, insert that into V1 if we
+  // can do so cheaply.
+  int NumElts = VT.getVectorNumElements();
+  int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) {
+    return M >= NumElts;
+  });
+  
+  if (NumV2Elements == 1 && Mask[0] >= NumElts)
+    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+                              DL, VT, V1, V2, Mask, Subtarget, DAG))
+      return Insertion;
+
   // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
   // check for those subtargets here and avoid much of the subtarget querying in
   // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
@@ -11903,7 +11971,7 @@ static  SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
   // Now we have only mask extension
   assert(InVT.getVectorElementType() == MVT::i1);
   SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
-  const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+  const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue();
   SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
   SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
@@ -11979,7 +12047,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
     }
 
     SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
-    const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+    const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue();
     SDValue CP = DAG.getConstantPool(C, getPointerTy());
     unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
     SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
@@ -12750,6 +12818,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
   return SDValue();
 }
 
+/// If we have at least two divisions that use the same divisor, convert to
+/// multplication by a reciprocal. This may need to be adjusted for a given
+/// CPU if a division's cost is not at least twice the cost of a multiplication.
+/// This is because we still need one division to calculate the reciprocal and
+/// then we need two multiplies by that reciprocal as replacements for the
+/// original divisions.
+bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+  return NumUsers > 1;
+}
+
 static bool isAllOnes(SDValue V) {
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
   return C && C->isAllOnesValue();
@@ -14427,7 +14505,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
 
   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
                        DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
-                       false,
+                       false, false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -15220,10 +15298,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
   }
   case PREFETCH: {
     SDValue Hint = Op.getOperand(6);
-    unsigned HintVal;
-    if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
-        (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
-      llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
+    unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
+    assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1");
     unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
     SDValue Chain = Op.getOperand(0);
     SDValue Mask  = Op.getOperand(2);
@@ -24175,7 +24251,7 @@ TargetLowering::ConstraintWeight
     break;
   case 'G':
   case 'C':
-    if (dyn_cast<ConstantFP>(CallOperandVal)) {
+    if (isa<ConstantFP>(CallOperandVal)) {
       weight = CW_Constant;
     }
     break;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index dd20ec2..5130c37 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1072,6 +1072,9 @@ namespace llvm {
     /// Use rcp* to speed up fdiv calculations.
     SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps) const override;
+
+    /// Reassociate floating point divisions into multiply by reciprocal.
+    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
   };
 
   namespace X86 {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 509602f..0959162 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -2971,60 +2971,36 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
                                     itins, HasBWI, IsCommutable>;
 }
 
-multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
-                            ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
-                            PatFrag memop_frag, X86MemOperand x86memop,
-                            PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
-                            string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
-  let isCommutable = IsCommutable in
-  {
-    def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, RC:$src2),
-       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-       []>, EVEX_4V;
-    def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-               (ins KRC:$mask, RC:$src1, RC:$src2),
-               !strconcat(OpcodeStr,
-                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
-               [], itins.rr>, EVEX_4V, EVEX_K;
-    def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, RC:$src2),
-                !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" ,
-                    "|$dst {${mask}} {z}, $src1, $src2}"),
-                [], itins.rr>, EVEX_4V, EVEX_KZ;
-  }
+multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
+                            SDNode OpNode,X86VectorVTInfo _Src, 
+                            X86VectorVTInfo _Dst, bit IsCommutable = 0> {
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 
+                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+                            "$src2, $src1","$src1, $src2", 
+                            (_Dst.VT (OpNode 
+                                         (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2))),
+                            "",itins.rr, IsCommutable>, 
+                            AVX512BIBase, EVEX_4V;
   let mayLoad = 1 in {
-    def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-              (ins RC:$src1, x86memop:$src2),
-              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              []>, EVEX_4V;
-    def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-               (ins KRC:$mask, RC:$src1, x86memop:$src2),
-               !strconcat(OpcodeStr,
-                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
-               [], itins.rm>, EVEX_4V, EVEX_K;
-    def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, x86memop:$src2),
-                !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
-                [], itins.rm>, EVEX_4V, EVEX_KZ;
-    def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-               (ins RC:$src1, x86scalar_mop:$src2),
-               !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                          ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
-               [], itins.rm>, EVEX_4V, EVEX_B;
-    def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
-                !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                           ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
-                           BrdcstStr, "}"),
-                [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
-    def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
-                 !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                            ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
-                            BrdcstStr, "}"),
-                 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
+      defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                            (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
+                            "$src2, $src1", "$src1, $src2",
+                            (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
+                                          (bitconvert (_Src.LdFrag addr:$src2)))),
+                            "", itins.rm>,
+                            AVX512BIBase, EVEX_4V;
+
+      defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                        (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2), 
+                        OpcodeStr,
+                        "${src2}"##_Dst.BroadcastStr##", $src1",
+                         "$src1, ${src2}"##_Dst.BroadcastStr,
+                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32 
+                                     (_Dst.VT (X86VBroadcast 
+                                              (_Dst.ScalarLdFrag addr:$src2)))))),
+                        "", itins.rm>,
+                        AVX512BIBase, EVEX_4V, EVEX_B;
   }
 }
 
@@ -3039,24 +3015,13 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
 
-defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
-                   loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
-                   SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
-                   EVEX_CD8<64, CD8VF>, VEX_W;
-
-defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
-                   loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
-                   SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
+                   X86pmuldq, v16i32_info, v8i64_info, 1>, 
+                   T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
 
-def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
-          (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
-           (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
-           (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPMULDQZrr VR512:$src1, VR512:$src2)>;
+defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
+                   X86pmuludq, v16i32_info, v8i64_info, 1>, 
+                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
 
 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
@@ -3208,7 +3173,7 @@ defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
                                   SSE_INTALU_ITINS_P, HasAVX512, 1>;
 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
-                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
+                                  SSE_INTALU_ITINS_P, HasAVX512, 0>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  FP arithmetic
@@ -3743,16 +3708,19 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins _.RC:$src2, _.ScalarMemOp:$src3),
-              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ),
-              (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
+              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
+              !strconcat("$src2, ${src3}", _.BroadcastStr ),
+              (OpNode _.RC:$src1,
+               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
               AVX512FMA3Base, EVEX_B;
  }
 } // Constraints = "$src1 = $dst"
 
 let Constraints = "$src1 = $dst" in {
 // Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
-                           SDPatternOperator OpNode> {
+multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
+                                 X86VectorVTInfo _,
+                                 SDPatternOperator OpNode> {
    defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
@@ -3772,7 +3740,6 @@ multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
                               SDPatternOperator OpNode> {
   defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
                               VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
-
   defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
                               VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
 }
@@ -3794,12 +3761,14 @@ let ExeDomain = SSEPackedSingle in {
 let ExeDomain = SSEPackedDouble in {
     defm  NAME##PDZ     : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
                                              v8f64_info, OpNode>,
-                          avx512_fma3_round_forms<opc213, OpcodeStr,
-                                             v8f64_info, OpNodeRnd>, EVEX_V512, VEX_W;
+                          avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
+                                                  OpNodeRnd>, EVEX_V512, VEX_W;
     defm  NAME##PDZ256  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v4f64x_info, OpNode>, EVEX_V256, VEX_W;
+                                             v4f64x_info, OpNode>,
+                                             EVEX_V256, VEX_W;
     defm  NAME##PDZ128  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v2f64x_info, OpNode>, EVEX_V128, VEX_W;
+                                             v2f64x_info, OpNode>,
+                                             EVEX_V128, VEX_W;
   }
 }
 
@@ -3830,26 +3799,29 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 } // Constraints = "$src1 = $dst"
 
-
-multiclass avx512_fma3p_m132_f<bits<8> opc,
-                              string OpcodeStr,
-                              SDNode OpNode> {
+multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
 
 let ExeDomain = SSEPackedSingle in {
     defm NAME##PSZ      : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+                                             OpNode,v16f32_info>, EVEX_V512,
+                                             EVEX_CD8<32, CD8VF>;
     defm NAME##PSZ256   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>;
+                                             OpNode, v8f32x_info>, EVEX_V256,
+                                             EVEX_CD8<32, CD8VF>;
     defm NAME##PSZ128   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>;
+                                             OpNode, v4f32x_info>, EVEX_V128,
+                                             EVEX_CD8<32, CD8VF>;
   }
 let ExeDomain = SSEPackedDouble in {
     defm  NAME##PDZ       : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v8f64_info>, EVEX_V512,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
     defm  NAME##PDZ256    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v4f64x_info>, EVEX_V256,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
     defm  NAME##PDZ128    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v2f64x_info>, EVEX_V128,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
   }
 }
 
@@ -3860,7 +3832,6 @@ defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
 defm VFNMADD132   : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
 defm VFNMSUB132   : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
 
-
 // Scalar FMA
 let Constraints = "$src1 = $dst" in {
 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3883,7 +3854,6 @@ multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                      (OpVT (OpNode RC:$src2, RC:$src1,
                             (mem_frag addr:$src3))))]>;
 }
-
 } // Constraints = "$src1 = $dst"
 
 defm VFMADDSSZ  : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
@@ -3920,6 +3890,7 @@ let hasSideEffects = 0 in {
               EVEX_4V;
 } // hasSideEffects = 0
 }
+
 let Predicates = [HasAVX512] in {
 defm VCVTSI2SSZ   : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
                                   XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 78efc4d..5e19ad4 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1216,10 +1216,10 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
 let isCompare = 1 in {
   let Defs = [EFLAGS] in {
     let isCommutable = 1 in {
-      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
-      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
-      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
-      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
+      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
+      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
+      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
     } // isCommutable
 
     def TEST8rm    : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 18bbe5d..45e6d0a 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1232,7 +1232,11 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
 // with implicit zero-extension instead of a 64-bit and if the immediate has at
 // least 32 bits of leading zeros. If in addition the last 32 bits can be
 // represented with a sign extension of a 8 bit constant, use that.
+// This can also reduce instruction size by eliminating the need for the REX
+// prefix.
 
+// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
+let AddedComplexity = 1 in {
 def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
           (SUBREG_TO_REG
             (i64 0),
@@ -1248,8 +1252,13 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
               (EXTRACT_SUBREG GR64:$src, sub_32bit),
               (i32 (GetLo32XForm imm:$imm))),
             sub_32bit)>;
+} // AddedComplexity = 1
 
 
+// AddedComplexity is needed due to the increased complexity on the
+// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
+// the MOVZX patterns keeps thems together in DAGIsel tables.
+let AddedComplexity = 1 in {
 // r & (2^16-1) ==> movz
 def : Pat<(and GR32:$src1, 0xffff),
           (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
@@ -1272,6 +1281,7 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
                          (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
                          sub_32bit)>;
 // r & (2^16-1) ==> movz
+let AddedComplexity = 1 in // Give priority over i64immZExt32.
 def : Pat<(and GR64:$src, 0xffff),
           (SUBREG_TO_REG (i64 0),
                       (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
@@ -1290,6 +1300,7 @@ def : Pat<(and GR16:$src1, 0xff),
            (EXTRACT_SUBREG (MOVZX32rr8 (i8
             (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
       Requires<[In64BitMode]>;
+} // AddedComplexity = 1
 
 
 // sext_inreg patterns
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0bdabdf..b75a9f4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -631,53 +631,53 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
 
 def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedLoadSDNode>(N))
-    return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 16;
   return false;
 }]>;
 
 def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedLoadSDNode>(N))
-    return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 32;
   return false;
 }]>;
 
 def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedLoadSDNode>(N))
-    return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 64;
   return false;
 }]>;
 
 def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  return (dyn_cast<MaskedLoadSDNode>(N) != 0);
+  return isa<MaskedLoadSDNode>(N);
 }]>;
 
 def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedStoreSDNode>(N))
-    return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 16;
   return false;
 }]>;
 
 def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedStoreSDNode>(N))
-    return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 32;
   return false;
 }]>;
 
 def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedStoreSDNode>(N))
-    return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 64;
   return false;
 }]>;
 
 def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  return (dyn_cast<MaskedStoreSDNode>(N) != 0);
+  return isa<MaskedStoreSDNode>(N);
 }]>;
 
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 538ec1c..fbfd868 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -559,6 +559,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MMX_PABSWrr64,     X86::MMX_PABSWrm64,     0 },
     { X86::MMX_PSHUFWri,      X86::MMX_PSHUFWmi,      0 },
 
+    // 3DNow! version of foldable instructions
+    { X86::PF2IDrr,         X86::PF2IDrm,             0 },
+    { X86::PF2IWrr,         X86::PF2IWrm,             0 },
+    { X86::PFRCPrr,         X86::PFRCPrm,             0 },
+    { X86::PFRSQRTrr,       X86::PFRSQRTrm,           0 },
+    { X86::PI2FDrr,         X86::PI2FDrm,             0 },
+    { X86::PI2FWrr,         X86::PI2FWrm,             0 },
+    { X86::PSWAPDrr,        X86::PSWAPDrm,            0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
     { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
@@ -943,6 +952,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::CMPPSrri,        X86::CMPPSrmi,      TB_ALIGN_16 },
     { X86::CMPSDrr,         X86::CMPSDrm,       0 },
     { X86::CMPSSrr,         X86::CMPSSrm,       0 },
+    { X86::CRC32r32r32,     X86::CRC32r32m32,   0 },
+    { X86::CRC32r64r64,     X86::CRC32r64m64,   0 },
     { X86::DIVPDrr,         X86::DIVPDrm,       TB_ALIGN_16 },
     { X86::DIVPSrr,         X86::DIVPSrm,       TB_ALIGN_16 },
     { X86::DIVSDrr,         X86::DIVSDrm,       0 },
@@ -1201,6 +1212,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MMX_PUNPCKLWDirr,  X86::MMX_PUNPCKLWDirm,  0 },
     { X86::MMX_PXORirr,       X86::MMX_PXORirm,       0 },
 
+    // 3DNow! version of foldable instructions
+    { X86::PAVGUSBrr,         X86::PAVGUSBrm,         0 },
+    { X86::PFACCrr,           X86::PFACCrm,           0 },
+    { X86::PFADDrr,           X86::PFADDrm,           0 },
+    { X86::PFCMPEQrr,         X86::PFCMPEQrm,         0 },
+    { X86::PFCMPGErr,         X86::PFCMPGErm,         0 },
+    { X86::PFCMPGTrr,         X86::PFCMPGTrm,         0 },
+    { X86::PFMAXrr,           X86::PFMAXrm,           0 },
+    { X86::PFMINrr,           X86::PFMINrm,           0 },
+    { X86::PFMULrr,           X86::PFMULrm,           0 },
+    { X86::PFNACCrr,          X86::PFNACCrm,          0 },
+    { X86::PFPNACCrr,         X86::PFPNACCrm,         0 },
+    { X86::PFRCPIT1rr,        X86::PFRCPIT1rm,        0 },
+    { X86::PFRCPIT2rr,        X86::PFRCPIT2rm,        0 },
+    { X86::PFRSQIT1rr,        X86::PFRSQIT1rm,        0 },
+    { X86::PFSUBrr,           X86::PFSUBrm,           0 },
+    { X86::PFSUBRrr,          X86::PFSUBRrm,          0 },
+    { X86::PMULHRWrr,         X86::PMULHRWrm,         0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 },
     { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 },
@@ -5969,6 +5999,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
   { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
   { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVLPSmr,   X86::MOVLPDmr,  X86::MOVPQI2QImr  },
   { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
   { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
   { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
@@ -5984,6 +6015,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
   { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
   { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  // TODO: Add the AVX versions of MOVLPSmr
   { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
   { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
   { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ccdbf0e..65b155c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -643,9 +643,6 @@ let Predicates = [UseAVX] in {
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
-  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
-                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
@@ -653,9 +650,6 @@ let Predicates = [UseAVX] in {
                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
   }
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
 
   // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -793,7 +787,7 @@ let Predicates = [UseSSE2] in {
             (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
 
   // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
-  // is during lowering, where it's not possible to recognize the fold cause
+  // is during lowering, where it's not possible to recognize the fold because
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
@@ -3678,13 +3672,30 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                   PS, Requires<[HasSSE2]>;
 } // SchedRW = [WriteStore]
 
+let Predicates = [HasAVX2, NoVLX] in {
+  def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+}
+
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
-            (VMOVNTPSmr addr:$dst, VR128:$src)>;
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
 }
 
 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
-          (MOVNTPSmr addr:$dst, VR128:$src)>;
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
 
 } // AddedComplexity
 
@@ -4890,7 +4901,8 @@ let Predicates = [UseAVX] in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
               (VMOVDI2PDIrr GR32:$src)>;
 
-  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+  // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
+  // These instructions also write zeros in the high part of a 256-bit register.
   let AddedComplexity = 20 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
               (VMOVDI2PDIrm addr:$src)>;
@@ -4898,6 +4910,9 @@ let Predicates = [UseAVX] in {
               (VMOVDI2PDIrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
               (VMOVDI2PDIrm addr:$src)>;
+    def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+                (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+              (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
   }
   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
@@ -5016,6 +5031,9 @@ let Predicates = [UseAVX], AddedComplexity = 20 in {
             (VMOVZQI2PQIrm addr:$src)>;
   def : Pat<(v2i64 (X86vzload addr:$src)),
             (VMOVZQI2PQIrm addr:$src)>;
+  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+              (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>;
 }
 
 let Predicates = [UseSSE2], AddedComplexity = 20 in {
@@ -7150,6 +7168,10 @@ let Predicates = [HasAVX2] in {
 }
 
 // Patterns
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
 let Predicates = [UseAVX] in {
   let AddedComplexity = 15 in {
   // Move scalar to XMM zero-extended, zeroing a VR128 then do a
@@ -7166,8 +7188,10 @@ let Predicates = [UseAVX] in {
   // Move low f32 and clear high bits.
   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
             (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>;
-  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
-            (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
+
+  // Move low f64 and clear high bits.
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
   }
 
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
@@ -7181,14 +7205,19 @@ let Predicates = [UseAVX] in {
                            (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
                            sub_xmm)>;
 
-  // Move low f64 and clear high bits.
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
-
+  // These will incur an FP/int domain crossing penalty, but it may be the only
+  // way without AVX2. Do not add any complexity because we may be able to match
+  // more optimal patterns defined earlier in this file.
+  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+            (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
             (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>;
 }
 
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
 let Predicates = [UseSSE41] in {
   // With SSE41 we can use blends for these patterns.
   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
@@ -8341,7 +8370,7 @@ let Predicates = [HasAVX2] in {
   def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
           (VBROADCASTSDYrr VR128:$src)>;
 
-  // Provide aliases for broadcast from the same regitser class that
+  // Provide aliases for broadcast from the same register class that
   // automatically does the extract.
   def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
             (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 42256b2..28a3b7b 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -334,6 +334,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FMUL_RND),
   X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
                      X86ISD::FMUL_RND),
+  X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512,  CMP_MASK,  X86ISD::PCMPEQM, 0),
@@ -358,6 +362,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512,  CMP_MASK,  X86ISD::PCMPGTM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_d,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_q,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pslli_d,       VSHIFT_MASK, X86ISD::VSHLI, 0),
@@ -376,6 +386,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psrli_q,       VSHIFT_MASK, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrlv_d,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrlv_q,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_sd,   INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::RNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK_RM,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index ca8fc9c..4bfc7f9 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -193,7 +193,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                                       DAG.getConstant(Offset, AddrVT)),
                           Src,
                           DAG.getConstant(BytesLeft, SizeVT),
-                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+                          Align, isVolatile, false,
+                          DstPtrInfo.getWithOffset(Offset));
   }
 
   // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -282,7 +283,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
                                     DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                 DAG.getConstant(Offset, SrcVT)),
                                     DAG.getConstant(BytesLeft, SizeVT),
-                                    Align, isVolatile, AlwaysInline,
+                                    Align, isVolatile, AlwaysInline, false,
                                     DstPtrInfo.getWithOffset(Offset),
                                     SrcPtrInfo.getWithOffset(Offset)));
   }
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 215fe89..36b3b02 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -30,7 +30,7 @@ void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot) {
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 78521fd..6fd2dec 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -32,7 +32,8 @@ public:
   static const char *getRegisterName(unsigned RegNo);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 private:
   void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
   void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index d0a09b2..4a790c8 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -81,12 +81,11 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T,
                                                unsigned SyntaxVariant,
                                                const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
+                                               const MCRegisterInfo &MRI) {
   return new XCoreInstPrinter(MAI, MII, MRI);
 }
 
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6e8a95a..c4e3bb8 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -308,7 +308,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
     Constant *GA = ConstantExpr::getBitCast(const_cast<GlobalValue*>(GV), Ty);
     Ty = Type::getInt32Ty(*DAG.getContext());
     Constant *Idx = ConstantInt::get(Ty, Offset);
-    Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx);
+    Constant *GAI = ConstantExpr::getGetElementPtr(
+        Type::getInt8Ty(*DAG.getContext()), GA, Idx);
     SDValue CP = DAG.getConstantPool(GAI, MVT::i32);
     return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP,
                        MachinePointerInfo(), false, false, false, 0);
@@ -1422,7 +1423,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       InVals.push_back(FIN);
       MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV,
                                      DAG.getConstant(Size, MVT::i32),
-                                     Align, false, false,
+                                     Align, false, false, false,
                                      MachinePointerInfo(),
                                      MachinePointerInfo()));
     } else {
@@ -1833,10 +1834,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
         LD->getAlignment() == Alignment &&
         !LD->isVolatile() && !LD->isIndexed() &&
         Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+        bool isTail = isInTailCallPosition(DAG, ST, Chain);
         return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
                               LD->getBasePtr(),
                               DAG.getConstant(StoreBits/8, MVT::i32),
-                              Alignment, false, ST->getPointerInfo(),
+                              Alignment, false, isTail, ST->getPointerInfo(),
                               LD->getPointerInfo());
       }
     }
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index b4c6a50..9fb63e9 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -82,8 +82,9 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
     case Instruction::GetElementPtr: {
       SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end());
       ArrayRef<Value *> CEOps(CEOpVec);
-      return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0],
-                                                             CEOps.slice(1)));
+      return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(
+          cast<GEPOperator>(CE)->getSourceElementType(), CEOps[0],
+          CEOps.slice(1)));
     }
     case Instruction::Add:
     case Instruction::Sub:
@@ -212,7 +213,8 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
     SmallVector<Value *, 2> Indices;
     Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
     Indices.push_back(ThreadID);
-    Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
+    Value *Addr =
+        Builder.CreateInBoundsGEP(NewGV->getValueType(), NewGV, Indices);
     U->replaceUsesOfWith(GV, Addr);
   }
 
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index 48bf0fa..3563dbc 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
 class XCoreTargetStreamer : public MCTargetStreamer {
 public:
   XCoreTargetStreamer(MCStreamer &S);
-  virtual ~XCoreTargetStreamer();
+  ~XCoreTargetStreamer() override;
   virtual void emitCCTopData(StringRef Name) = 0;
   virtual void emitCCTopFunction(StringRef Name) = 0;
   virtual void emitCCBottomData(StringRef Name) = 0;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 46480bd..56975ea 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -207,6 +207,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   // Make sure that it is local to this module.
   if (!F || !F->hasLocalLinkage()) return nullptr;
 
+  // Don't promote arguments for variadic functions. Adding, removing, or
+  // changing non-pack parameters can change the classification of pack
+  // parameters. Frontends encode that classification at the call site in the
+  // IR, while in the callee the classification is determined dynamically based
+  // on the number of registers consumed so far.
+  if (F->isVarArg()) return nullptr;
+
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<Argument*, 16> PointerArgs;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
@@ -227,12 +234,6 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
       isSelfRecursive = true;
   }
   
-  // Don't promote arguments for variadic functions. Adding, removing, or
-  // changing non-pack parameters can change the classification of pack
-  // parameters. Frontends encode that classification at the call site in the
-  // IR, while in the callee the classification is determined dynamically based
-  // on the number of registers consumed so far.
-  if (F->isVarArg()) return nullptr;
   const DataLayout &DL = F->getParent()->getDataLayout();
 
   // Check to see which arguments are promotable.  If an argument is promotable,
@@ -674,8 +675,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       for (ScalarizeTable::iterator SI = ArgIndices.begin(),
              E = ArgIndices.end(); SI != E; ++SI) {
         // not allowed to dereference ->begin() if size() is 0
-        Params.push_back(
-            GetElementPtrInst::getIndexedType(I->getType(), SI->second));
+        Params.push_back(GetElementPtrInst::getIndexedType(
+            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+            SI->second));
         assert(Params.back());
       }
 
@@ -704,7 +706,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   auto DI = FunctionDIs.find(F);
   if (DI != FunctionDIs.end()) {
     DISubprogram SP = DI->second;
-    SP.replaceFunction(NF);
+    SP->replaceFunction(NF);
     // Ensure the map is updated so it can be reused on subsequent argument
     // promotions of the same function.
     FunctionDIs.erase(DI);
@@ -860,7 +862,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
     // Update the callgraph to know that the callsite has been transformed.
     CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
-    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+    CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
 
     if (!Call->use_empty()) {
       Call->replaceAllUsesWith(New);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4431311..3be23d5 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -73,8 +73,8 @@ namespace {
       }
 
       std::string getDescription() const {
-        return std::string((IsArg ? "Argument #" : "Return value #"))
-               + utostr(Idx) + " of function " + F->getName().str();
+        return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
+                " of function " + F->getName()).str();
       }
     };
 
@@ -304,7 +304,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   auto DI = FunctionDIs.find(&Fn);
   if (DI != FunctionDIs.end()) {
     DISubprogram SP = DI->second;
-    SP.replaceFunction(NF);
+    SP->replaceFunction(NF);
     // Ensure the map is updated so it can be reused on non-varargs argument
     // eliminations of the same function.
     FunctionDIs.erase(DI);
@@ -482,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
       return Result;
     }
 
-    if (ImmutableCallSite CS = V) {
+    if (auto CS = ImmutableCallSite(V)) {
       const Function *F = CS.getCalledFunction();
       if (F) {
         // Used in a direct call.
@@ -1092,7 +1092,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // Patch the pointer to LLVM function in debug info descriptor.
   auto DI = FunctionDIs.find(F);
   if (DI != FunctionDIs.end())
-    DI->second.replaceFunction(NF);
+    DI->second->replaceFunction(NF);
 
   // Now that the old function is dead, delete it.
   F->eraseFromParent();
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 20b41fb..b8c4f5d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -564,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
     if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
 
     Value *NewPtr = NewGlobals[Val];
+    Type *NewTy = NewGlobals[Val]->getType();
 
     // Form a shorter GEP if needed.
     if (GEP->getNumOperands() > 3) {
@@ -572,7 +573,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
         Idxs.push_back(NullInt);
         for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
           Idxs.push_back(CE->getOperand(i));
-        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
+        NewPtr =
+            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
+        NewTy = GetElementPtrInst::getIndexedType(NewTy, Idxs);
       } else {
         GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
         SmallVector<Value*, 8> Idxs;
@@ -721,8 +724,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
         else
           break;
       if (Idxs.size() == GEPI->getNumOperands()-1)
-        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
-                          ConstantExpr::getGetElementPtr(NewV, Idxs));
+        Changed |= OptimizeAwayTrappingUsesOfValue(
+            GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
       if (GEPI->use_empty()) {
         Changed = true;
         GEPI->eraseFromParent();
@@ -2338,7 +2341,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
               Constant * const IdxList[] = {IdxZero, IdxZero};
 
-              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
+              Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
               if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
                 Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
 
@@ -2402,8 +2405,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
            i != e; ++i)
         GEPOps.push_back(getVal(*i));
       InstResult =
-        ConstantExpr::getGetElementPtr(P, GEPOps,
-                                       cast<GEPOperator>(GEP)->isInBounds());
+          ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+                                         cast<GEPOperator>(GEP)->isInBounds());
       DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
             << "\n");
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
index fe00d92..f3f8529 100644
--- a/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -349,7 +349,8 @@ void LowerBitSets::allocateByteArrays() {
 
     Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
                         ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
-    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs);
+    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
+        ByteArrayConst->getType(), ByteArray, Idxs);
 
     // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
     // that the pc-relative displacement is folded into the lea instead of the
@@ -395,16 +396,17 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
     }
 
     Constant *ByteArray = BAI->ByteArray;
+    Type *Ty = BAI->ByteArray->getValueType();
     if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
       // Each use of the byte array uses a different alias. This makes the
       // backend less likely to reuse previously computed byte array addresses,
       // improving the security of the CFI mechanism based on this pass.
-      ByteArray = GlobalAlias::create(
-          BAI->ByteArray->getType()->getElementType(), 0,
-          GlobalValue::PrivateLinkage, "bits_use", ByteArray, M);
+      ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
+                                      GlobalValue::PrivateLinkage, "bits_use",
+                                      ByteArray, M);
     }
 
-    Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset);
+    Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
     Value *Byte = B.CreateLoad(ByteAddr);
 
     Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
@@ -546,8 +548,8 @@ void LowerBitSets::buildBitSetsFromGlobals(
     // Multiply by 2 to account for padding elements.
     Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
                                       ConstantInt::get(Int32Ty, I * 2)};
-    Constant *CombinedGlobalElemPtr =
-        ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);
+    Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
+        NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
     if (LinkerSubsectionsViaSymbols) {
       Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
     } else {
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index d28d563..502451b 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
 RunLoopRerolling("reroll-loops", cl::Hidden,
                  cl::desc("Run the loop rerolling pass"));
 
+static cl::opt<bool>
+RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
+             cl::desc("Run the float2int (float demotion) pass"));
+
 static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
                                     cl::Hidden,
                                     cl::desc("Run the load combining pass"));
@@ -307,6 +311,9 @@ void PassManagerBuilder::populateModulePassManager(
   // we must insert a no-op module pass to reset the pass manager.
   MPM.add(createBarrierNoopPass());
 
+  if (RunFloat2Int)
+    MPM.add(createFloat2IntPass());
+
   // Re-rotate loops in all our loop nests. These may have fallout out of
   // rotated form due to GVN or other transformations, and the vectorizer relies
   // on the rotated form.
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 816978e..ad7c5a0 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -305,33 +305,29 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
   SmallVector<Metadata *, 64> LiveSubprograms;
   DenseSet<const MDNode *> VisitedSet;
 
-  for (DICompileUnit DIC : F.compile_units()) {
-    assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
-
+  for (MDCompileUnit *DIC : F.compile_units()) {
     // Create our live subprogram list.
-    DIArray SPs = DIC.getSubprograms();
+    MDSubprogramArray SPs = DIC->getSubprograms();
     bool SubprogramChange = false;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram DISP(SPs.getElement(i));
-      assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
+    for (unsigned i = 0, e = SPs.size(); i != e; ++i) {
+      DISubprogram DISP = SPs[i];
 
       // Make sure we visit each subprogram only once.
       if (!VisitedSet.insert(DISP).second)
         continue;
 
       // If the function referenced by DISP is not null, the function is live.
-      if (DISP.getFunction())
+      if (DISP->getFunction())
         LiveSubprograms.push_back(DISP);
       else
         SubprogramChange = true;
     }
 
     // Create our live global variable list.
-    DIArray GVs = DIC.getGlobalVariables();
+    MDGlobalVariableArray GVs = DIC->getGlobalVariables();
     bool GlobalVariableChange = false;
-    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
-      DIGlobalVariable DIG(GVs.getElement(i));
-      assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
+    for (unsigned i = 0, e = GVs.size(); i != e; ++i) {
+      DIGlobalVariable DIG = GVs[i];
 
       // Make sure we only visit each global variable only once.
       if (!VisitedSet.insert(DIG).second)
@@ -339,7 +335,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
 
       // If the global variable referenced by DIG is not null, the global
       // variable is live.
-      if (DIG.getGlobal())
+      if (DIG->getVariable())
         LiveGlobalVariables.push_back(DIG);
       else
         GlobalVariableChange = true;
@@ -349,12 +345,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
     // subprogram list/global variable list with our new live subprogram/global
     // variable list.
     if (SubprogramChange) {
-      DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms)));
+      DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
       Changed = true;
     }
 
     if (GlobalVariableChange) {
-      DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables)));
+      DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
       Changed = true;
     }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 21243c2..56b6cd3 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -197,12 +197,51 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   return nullptr;
 }
 
+static Value *SimplifyX86insertps(const IntrinsicInst &II,
+                                  InstCombiner::BuilderTy &Builder) {
+  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+    VectorType *VecTy = cast<VectorType>(II.getType());
+    ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+    
+    // The immediate permute control byte looks like this:
+    //    [3:0] - zero mask for each 32-bit lane
+    //    [5:4] - select one 32-bit destination lane
+    //    [7:6] - select one 32-bit source lane
+
+    uint8_t Imm = CInt->getZExtValue();
+    uint8_t ZMask = Imm & 0xf;
+    uint8_t DestLane = (Imm >> 4) & 0x3;
+    uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+    // If all zero mask bits are set, this was just a weird way to
+    // generate a zero vector.
+    if (ZMask == 0xf)
+      return ZeroVector;
+    
+    // TODO: Model this case as two shuffles or a 'logical and' plus shuffle?
+    if (ZMask)
+      return nullptr;
+
+    assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+    // If we're not zeroing anything, this is a single shuffle.
+    // Replace the selected destination lane with the selected source lane.
+    // For all other lanes, pass the first source bits through.
+    int ShuffleMask[4] = { 0, 1, 2, 3 };
+    ShuffleMask[DestLane] = SourceLane + 4;
+    
+    return Builder.CreateShuffleVector(II.getArgOperand(0), II.getArgOperand(1),
+                                       ShuffleMask);
+  }
+  return nullptr;
+}
+
 /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
 /// source vectors, unless a zero bit is set. If a zero bit is set,
 /// then ignore that half of the mask and clear that half of the vector.
 static Value *SimplifyX86vperm2(const IntrinsicInst &II,
                                 InstCombiner::BuilderTy &Builder) {
-  if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
     VectorType *VecTy = cast<VectorType>(II.getType());
     ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
 
@@ -415,112 +454,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     }
     break;
-  case Intrinsic::uadd_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II);
-    if (OR == OverflowResult::NeverOverflows)
-      return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false);
-    if (OR == OverflowResult::AlwaysOverflows)
-      return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true);
-  }
-  // FALL THROUGH uadd into sadd
+
+  case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
-    // Canonicalize constants into the RHS.
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::smul_with_overflow:
     if (isa<Constant>(II->getArgOperand(0)) &&
         !isa<Constant>(II->getArgOperand(1))) {
+      // Canonicalize constants into the RHS.
       Value *LHS = II->getArgOperand(0);
       II->setArgOperand(0, II->getArgOperand(1));
       II->setArgOperand(1, LHS);
       return II;
     }
+    // fall through
 
-    // X + undef -> undef
-    if (isa<UndefValue>(II->getArgOperand(1)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
-      // X + 0 -> {X, false}
-      if (RHS->isZero()) {
-        return CreateOverflowTuple(II, II->getArgOperand(0), false,
-                                    /*ReUseName*/false);
-      }
-    }
-
-    // We can strength reduce reduce this signed add into a regular add if we
-    // can prove that it will never overflow.
-    if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
-      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-      if (WillNotOverflowSignedAdd(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
-      }
-    }
-
-    break;
   case Intrinsic::usub_with_overflow:
   case Intrinsic::ssub_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    // undef - X -> undef
-    // X - undef -> undef
-    if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
-    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
-      // X - 0 -> {X, false}
-      if (ConstRHS->isZero()) {
-        return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
-      }
-    }
-    if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
-      if (WillNotOverflowSignedSub(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
-      }
-    } else {
-      if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
-      }
-    }
-    break;
-  }
-  case Intrinsic::umul_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II);
-    if (OR == OverflowResult::NeverOverflows)
-      return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false);
-    if (OR == OverflowResult::AlwaysOverflows)
-      return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true);
-  } // FALL THROUGH
-  case Intrinsic::smul_with_overflow:
-    // Canonicalize constants into the RHS.
-    if (isa<Constant>(II->getArgOperand(0)) &&
-        !isa<Constant>(II->getArgOperand(1))) {
-      Value *LHS = II->getArgOperand(0);
-      II->setArgOperand(0, II->getArgOperand(1));
-      II->setArgOperand(1, LHS);
-      return II;
-    }
-
-    // X * undef -> undef
-    if (isa<UndefValue>(II->getArgOperand(1)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+    OverflowCheckFlavor OCF =
+        IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+    assert(OCF != OCF_INVALID && "unexpected!");
 
-    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
-      // X*0 -> {0, false}
-      if (RHSI->isZero())
-        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+    Value *OperationResult = nullptr;
+    Constant *OverflowResult = nullptr;
+    if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+                              *II, OperationResult, OverflowResult))
+      return CreateOverflowTuple(II, OperationResult, OverflowResult);
 
-      // X * 1 -> {X, false}
-      if (RHSI->equalsInt(1)) {
-        return CreateOverflowTuple(II, II->getArgOperand(0), false,
-                                    /*ReUseName*/false);
-      }
-    }
-    if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
-      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-      if (WillNotOverflowSignedMul(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
-      }
-    }
     break;
+  }
+
   case Intrinsic::minnum:
   case Intrinsic::maxnum: {
     Value *Arg0 = II->getArgOperand(0);
@@ -806,7 +769,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
     break;
   }
-
+  case Intrinsic::x86_sse41_insertps:
+    if (Value *V = SimplifyX86insertps(*II, *Builder))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+    
   case Intrinsic::x86_sse4a_insertqi: {
     // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
     // ones undef
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index fe544c2..bd79a26 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1450,42 +1450,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
       CI.setOperand(0, GEP->getOperand(0));
       return &CI;
     }
-
-    // If the GEP has a single use, and the base pointer is a bitcast, and the
-    // GEP computes a constant offset, see if we can convert these three
-    // instructions into fewer.  This typically happens with unions and other
-    // non-type-safe code.
-    unsigned AS = GEP->getPointerAddressSpace();
-    unsigned OffsetBits = DL.getPointerSizeInBits(AS);
-    APInt Offset(OffsetBits, 0);
-    BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
-    if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) {
-      // FIXME: This is insufficiently tested - just a no-crash test
-      // (test/Transforms/InstCombine/2007-05-14-Crash.ll)
-      //
-      // Get the base pointer input of the bitcast, and the type it points to.
-      Value *OrigBase = BCI->getOperand(0);
-      SmallVector<Value*, 8> NewIndices;
-      if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(),
-                              NewIndices)) {
-        // FIXME: This codepath is completely untested - could be unreachable
-        // for all I know.
-        // If we were able to index down into an element, create the GEP
-        // and bitcast the result.  This eliminates one bitcast, potentially
-        // two.
-        Value *NGEP = cast<GEPOperator>(GEP)->isInBounds()
-                          ? Builder->CreateInBoundsGEP(OrigBase, NewIndices)
-                          : Builder->CreateGEP(
-                                OrigBase->getType()->getPointerElementType(),
-                                OrigBase, NewIndices);
-        NGEP->takeName(GEP);
-
-        if (isa<BitCastInst>(CI))
-          return new BitCastInst(NGEP, CI.getType());
-        assert(isa<PtrToIntInst>(CI));
-        return new PtrToIntInst(NGEP, CI.getType());
-      }
-    }
   }
 
   return commonCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 803b50a..223bba0 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2109,33 +2109,112 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
 }
 
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
-                                     InstCombiner &IC) {
-  // Don't bother doing this transformation for pointers, don't do it for
-  // vectors.
-  if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
+bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
+                                         Value *RHS, Instruction &OrigI,
+                                         Value *&Result, Constant *&Overflow) {
+  assert((!OrigI.isCommutative() ||
+          !(isa<Constant>(LHS) && !isa<Constant>(RHS))) &&
+         "call with a constant RHS if possible!");
+
+  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
+    Result = OpResult;
+    Overflow = OverflowVal;
+    if (ReuseName)
+      Result->takeName(&OrigI);
+    return true;
+  };
 
-  // If the add is a constant expr, then we don't bother transforming it.
-  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
-  if (!OrigAdd) return nullptr;
+  switch (OCF) {
+  case OCF_INVALID:
+    llvm_unreachable("bad overflow check kind!");
 
-  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+  case OCF_UNSIGNED_ADD: {
+    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
+    if (OR == OverflowResult::NeverOverflows)
+      return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+                       true);
 
-  // Put the new code above the original add, in case there are any uses of the
-  // add between the add and the compare.
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-  Builder->SetInsertPoint(OrigAdd);
+    if (OR == OverflowResult::AlwaysOverflows)
+      return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+  }
+  // FALL THROUGH uadd into sadd
+  case OCF_SIGNED_ADD: {
+    // X + undef -> undef
+    if (isa<UndefValue>(RHS))
+      return SetResult(UndefValue::get(RHS->getType()),
+                       UndefValue::get(Builder->getInt1Ty()), false);
+
+    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS))
+      // X + 0 -> {X, false}
+      if (ConstRHS->isZero())
+        return SetResult(LHS, Builder->getFalse(), false);
+
+    // We can strength reduce this signed add into a regular add if we can prove
+    // that it will never overflow.
+    if (OCF == OCF_SIGNED_ADD)
+      if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+                         true);
+  }
 
-  Module *M = I.getParent()->getParent()->getParent();
-  Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0);
+  case OCF_UNSIGNED_SUB:
+  case OCF_SIGNED_SUB: {
+    // undef - X -> undef
+    // X - undef -> undef
+    if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
+      return SetResult(UndefValue::get(LHS->getType()),
+                       UndefValue::get(Builder->getInt1Ty()), false);
+
+    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS))
+      // X - 0 -> {X, false}
+      if (ConstRHS->isZero())
+        return SetResult(UndefValue::get(LHS->getType()), Builder->getFalse(),
+                         false);
+
+    if (OCF == OCF_SIGNED_SUB) {
+      if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+                         true);
+    } else {
+      if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+                         true);
+    }
+    break;
+  }
 
-  IC.ReplaceInstUsesWith(*OrigAdd, Add);
+  case OCF_UNSIGNED_MUL: {
+    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
+    if (OR == OverflowResult::NeverOverflows)
+      return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+                       true);
+    if (OR == OverflowResult::AlwaysOverflows)
+      return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+  } // FALL THROUGH
+  case OCF_SIGNED_MUL:
+    // X * undef -> undef
+    if (isa<UndefValue>(RHS))
+      return SetResult(UndefValue::get(LHS->getType()),
+                       UndefValue::get(Builder->getInt1Ty()), false);
+
+    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(RHS)) {
+      // X * 0 -> {0, false}
+      if (RHSI->isZero())
+        return SetResult(Constant::getNullValue(RHS->getType()),
+                         Builder->getFalse(), false);
+
+      // X * 1 -> {X, false}
+      if (RHSI->equalsInt(1))
+        return SetResult(LHS, Builder->getFalse(), false);
+    }
 
-  // The original icmp gets replaced with the overflow value.
-  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+    if (OCF == OCF_SIGNED_MUL)
+      if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+                         true);
+  }
+
+  return false;
 }
 
 /// \brief Recognize and process idiom involving test for multiplication
@@ -3432,21 +3511,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
     }
 
-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
-
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+    Instruction *AddI = nullptr;
+    if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+                                     m_Instruction(AddI))) &&
+        isa<IntegerType>(A->getType())) {
+      Value *Result;
+      Constant *Overflow;
+      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+                                Overflow)) {
+        ReplaceInstUsesWith(*AddI, Result);
+        return ReplaceInstUsesWith(I, Overflow);
+      }
+    }
 
     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index fb2321d..d0caf34 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -110,6 +110,41 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
   return false;
 }
 
+
+/// \brief Specific patterns of overflow check idioms that we match.
+enum OverflowCheckFlavor {
+  OCF_UNSIGNED_ADD,
+  OCF_SIGNED_ADD,
+  OCF_UNSIGNED_SUB,
+  OCF_SIGNED_SUB,
+  OCF_UNSIGNED_MUL,
+  OCF_SIGNED_MUL,
+
+  OCF_INVALID
+};
+
+/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op
+/// intrinsic.
+static inline OverflowCheckFlavor
+IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
+  switch (ID) {
+  default:
+    return OCF_INVALID;
+  case Intrinsic::uadd_with_overflow:
+    return OCF_UNSIGNED_ADD;
+  case Intrinsic::sadd_with_overflow:
+    return OCF_SIGNED_ADD;
+  case Intrinsic::usub_with_overflow:
+    return OCF_UNSIGNED_SUB;
+  case Intrinsic::ssub_with_overflow:
+    return OCF_SIGNED_SUB;
+  case Intrinsic::umul_with_overflow:
+    return OCF_UNSIGNED_MUL;
+  case Intrinsic::smul_with_overflow:
+    return OCF_SIGNED_MUL;
+  }
+}
+
 /// \brief An IRBuilder inserter that adds new instructions to the instcombine
 /// worklist.
 class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
@@ -316,7 +351,7 @@ private:
   bool ShouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
   Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
-  Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+  Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
                             SmallVectorImpl<Value *> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
 
@@ -329,6 +364,17 @@ private:
   bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
                           Type *Ty);
 
+  /// \brief Try to optimize a sequence of instructions checking if an operation
+  /// on LHS and RHS overflows.
+  ///
+  /// If a simplification is possible, stores the simplified result of the
+  /// operation in OperationResult and result of the overflow check in
+  /// OverflowResult, and return true.  If no simplification is possible,
+  /// returns false.
+  bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+                             Instruction &CtxI, Value *&OperationResult,
+                             Constant *&OverflowResult);
+
   Instruction *visitCallSite(CallSite CS);
   Instruction *tryOptimizeCall(CallInst *CI);
   bool transformConstExprCastCall(CallSite CS);
@@ -391,14 +437,10 @@ public:
   }
 
   /// Creates a result tuple for an overflow intrinsic \p II with a given
-  /// \p Result and a constant \p Overflow value. If \p ReUseName is true the
-  /// \p Result's name is taken from \p II.
+  /// \p Result and a constant \p Overflow value.
   Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result,
-                                   bool Overflow, bool ReUseName = true) {
-    if (ReUseName)
-      Result->takeName(II);
-    Constant *V[] = {UndefValue::get(Result->getType()),
-                     Overflow ? Builder->getTrue() : Builder->getFalse()};
+                                   Constant *Overflow) {
+    Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
     StructType *ST = cast<StructType>(II->getType());
     Constant *Struct = ConstantStruct::get(ST, V);
     return InsertValueInst::Create(Struct, Result, 0);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6b0f268..d8a559c 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -84,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         continue;
       }
 
-      if (CallSite CS = I) {
+      if (auto CS = CallSite(I)) {
         // If this is the function being called then we treat it like a load and
         // ignore it.
         if (CS.isCallee(&U))
@@ -611,8 +611,10 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
     return false;
 
   SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
-  Type *AllocTy =
-    GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
+  Type *AllocTy = GetElementPtrInst::getIndexedType(
+      cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
+          ->getElementType(),
+      Ops);
   if (!AllocTy || !AllocTy->isSized())
     return false;
   const DataLayout &DL = IC.getDataLayout();
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index b6beb65..24446c8 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -987,8 +987,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     unsigned BegIdx = Mask.front();
     VectorType *SrcTy = cast<VectorType>(V->getType());
     unsigned VecBitWidth = SrcTy->getBitWidth();
-    unsigned SrcElemBitWidth =
-        SrcTy->getElementType()->getPrimitiveSizeInBits();
+    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
     assert(SrcElemBitWidth && "vector elements must have a bitwidth");
     unsigned SrcNumElems = SrcTy->getNumElements();
     SmallVector<BitCastInst *, 8> BCs;
@@ -1000,7 +999,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
           BCs.push_back(BC);
     for (BitCastInst *BC : BCs) {
       Type *TgtTy = BC->getDestTy();
-      unsigned TgtElemBitWidth = TgtTy->getPrimitiveSizeInBits();
+      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
       if (!TgtElemBitWidth)
         continue;
       unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 90551e4..3b46156 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -869,11 +869,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
 /// whether or not there is a sequence of GEP indices into the pointed type that
 /// will land us at the specified offset.  If so, fill them into NewIndices and
 /// return the resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
                                         SmallVectorImpl<Value *> &NewIndices) {
-  assert(PtrTy->isPtrOrPtrVectorTy());
-
-  Type *Ty = PtrTy->getPointerElementType();
+  Type *Ty = PtrTy->getElementType();
   if (!Ty->isSized())
     return nullptr;
 
@@ -1611,12 +1609,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // %0 = GEP [10 x i8] addrspace(1)* X, ...
             // addrspacecast i8 addrspace(1)* %0 to i8*
             SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
-            Value *NewGEP =
-                GEP.isInBounds()
-                    ? Builder->CreateInBoundsGEP(StrippedPtr, Idx,
-                                                 GEP.getName())
-                    : Builder->CreateGEP(StrippedPtrTy->getElementType(),
-                                         StrippedPtr, Idx, GEP.getName());
+            Value *NewGEP = GEP.isInBounds()
+                                ? Builder->CreateInBoundsGEP(
+                                      nullptr, StrippedPtr, Idx, GEP.getName())
+                                : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
+                                                     GEP.getName());
             return new AddrSpaceCastInst(NewGEP, GEP.getType());
           }
         }
@@ -1634,9 +1631,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP =
             GEP.isInBounds()
-                ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName())
-                : Builder->CreateGEP(StrippedPtrTy->getElementType(),
-                                     StrippedPtr, Idx, GEP.getName());
+                ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+                                             GEP.getName())
+                : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
 
         // V and GEP are both pointer types --> BitCast
         return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1669,10 +1666,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // GEP may not be "inbounds".
             Value *NewGEP =
                 GEP.isInBounds() && NSW
-                    ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx,
+                    ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
                                                  GEP.getName())
-                    : Builder->CreateGEP(StrippedPtrTy->getElementType(),
-                                         StrippedPtr, NewIdx, GEP.getName());
+                    : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
+                                         GEP.getName());
 
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1710,9 +1707,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                 Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
                 NewIdx};
 
-            Value *NewGEP = GEP.isInBounds() && NSW ?
-              Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
-              Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName());
+            Value *NewGEP = GEP.isInBounds() && NSW
+                                ? Builder->CreateInBoundsGEP(
+                                      SrcElTy, StrippedPtr, Off, GEP.getName())
+                                : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
+                                                     GEP.getName());
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
                                                                  GEP.getType());
@@ -1774,9 +1773,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // GEP.
       SmallVector<Value*, 8> NewIndices;
       if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
-        Value *NGEP = GEP.isInBounds() ?
-          Builder->CreateInBoundsGEP(Operand, NewIndices) :
-          Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices);
+        Value *NGEP =
+            GEP.isInBounds()
+                ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
+                : Builder->CreateGEP(nullptr, Operand, NewIndices);
 
         if (NGEP->getType() == GEP.getType())
           return ReplaceInstUsesWith(GEP, NGEP);
@@ -2268,7 +2268,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // We need to insert these at the location of the old load, not at that of
       // the extractvalue.
       Builder->SetInsertPoint(L->getParent(), L);
-      Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
+      Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
+                                              L->getPointerOperand(), Indices);
       // Returning the load directly will cause the main loop to insert it in
       // the wrong spot, so use ReplaceInstUsesWith().
       return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
@@ -2725,7 +2726,7 @@ bool InstCombiner::run() {
         DEBUG(dbgs() << "IC: Old = " << *I << '\n'
                      << "    New = " << *Result << '\n');
 
-        if (!I->getDebugLoc().isUnknown())
+        if (I->getDebugLoc())
           Result->setDebugLoc(I->getDebugLoc());
         // Everything uses the new instruction now.
         I->replaceAllUsesWith(Result);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 978c857..8d6d3ce 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -401,12 +401,12 @@ struct AddressSanitizer : public FunctionPass {
     return SizeInBytes;
   }
   /// Check if we want (and can) handle this alloca.
-  bool isInterestingAlloca(AllocaInst &AI) const;
+  bool isInterestingAlloca(AllocaInst &AI);
   /// If it is an interesting memory access, return the PointerOperand
   /// and set IsWrite/Alignment. Otherwise return nullptr.
   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
                                    uint64_t *TypeSize,
-                                   unsigned *Alignment) const;
+                                   unsigned *Alignment);
   void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
                      bool UseCalls, const DataLayout &DL);
   void instrumentPointerComparisonOrSubtraction(Instruction *I);
@@ -458,6 +458,7 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
   InlineAsm *EmptyAsm;
   GlobalsMetadata GlobalsMD;
+  DenseMap<AllocaInst *, bool> ProcessedAllocas;
 
   friend struct FunctionStackPoisoner;
 };
@@ -804,13 +805,21 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
 }
 
 /// Check if we want (and can) handle this alloca.
-bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const {
-  return (AI.getAllocatedType()->isSized() &&
-          // alloca() may be called with 0 size, ignore it.
-          getAllocaSizeInBytes(&AI) > 0 &&
-          // We are only interested in allocas not promotable to registers.
-          // Promotable allocas are common under -O0.
-          (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
+  auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI);
+
+  if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
+    return PreviouslySeenAllocaInfo->getSecond();
+
+  bool IsInteresting = (AI.getAllocatedType()->isSized() &&
+    // alloca() may be called with 0 size, ignore it.
+    getAllocaSizeInBytes(&AI) > 0 &&
+    // We are only interested in allocas not promotable to registers.
+    // Promotable allocas are common under -O0.
+    (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+
+  ProcessedAllocas[&AI] = IsInteresting;
+  return IsInteresting;
 }
 
 /// If I is an interesting memory access, return the PointerOperand
@@ -818,7 +827,7 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const {
 Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
                                                    bool *IsWrite,
                                                    uint64_t *TypeSize,
-                                                   unsigned *Alignment) const {
+                                                   unsigned *Alignment) {
   // Skip memory accesses inserted by another instrumentation.
   if (I->getMetadata("nosanitize")) return nullptr;
 
@@ -959,18 +968,6 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
                                    UseCalls, Exp);
 }
 
-// Validate the result of Module::getOrInsertFunction called for an interface
-// function of AddressSanitizer. If the instrumented module defines a function
-// with the same name, their prototypes must match, otherwise
-// getOrInsertFunction returns a bitcast.
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
-  FuncOrBitcast->dump();
-  report_fatal_error(
-      "trying to redefine an AddressSanitizer "
-      "interface function");
-}
-
 Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
                                                  Value *Addr, bool IsWrite,
                                                  size_t AccessSizeIndex,
@@ -1056,7 +1053,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
     // path is rarely taken. This seems to be the case for SPEC benchmarks.
     TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(
         Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
-    assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
+    assert(cast<BranchInst>(CheckTerm)->isUnconditional());
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
@@ -1219,17 +1216,17 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
 void AddressSanitizerModule::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   // Declare our poisoning and unpoisoning functions.
-  AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
   AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr));
   AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
   // Declare functions that register/unregister globals.
-  AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnregisterGlobals = checkInterfaceFunction(
+  AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
   AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
@@ -1317,7 +1314,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     Indices2[1] = IRB.getInt32(0);
 
     G->replaceAllUsesWith(
-        ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
+        ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
     NewGlobal->takeName(G);
     G->eraseFromParent();
 
@@ -1399,44 +1396,44 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
       const std::string ExpStr = Exp ? "exp_" : "";
       const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
       AsanErrorCallbackSized[AccessIsWrite][Exp] =
-          checkInterfaceFunction(M.getOrInsertFunction(
+          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
               kAsanReportErrorTemplate + ExpStr + TypeStr + "_n",
               IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
       AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
-          checkInterfaceFunction(M.getOrInsertFunction(
+          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
               ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N",
               IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
       for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
            AccessSizeIndex++) {
         const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
         AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
-            checkInterfaceFunction(M.getOrInsertFunction(
+            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
                 kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(),
                 IntptrTy, ExpType, nullptr));
         AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
-            checkInterfaceFunction(M.getOrInsertFunction(
+            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
                 ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(),
                 IntptrTy, ExpType, nullptr));
       }
     }
   }
 
-  AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  AsanMemset = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr));
 
-  AsanHandleNoReturnFunc = checkInterfaceFunction(
+  AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr));
 
-  AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-  AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -1461,7 +1458,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
   BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
   // call __asan_init in the module ctor.
   IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
-  AsanInitFunction = checkInterfaceFunction(
+  AsanInitFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr));
   AsanInitFunction->setLinkage(Function::ExternalLinkage);
   IRB.CreateCall(AsanInitFunction);
@@ -1613,16 +1610,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
     std::string Suffix = itostr(i);
-    AsanStackMallocFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr));
-    AsanStackFreeFunc[i] = checkInterfaceFunction(
+    AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
+        M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+                              IntptrTy, nullptr));
+    AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
                               IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   }
-  AsanPoisonStackMemoryFunc = checkInterfaceFunction(
+  AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
-  AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(
+  AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
 }
@@ -1757,9 +1755,11 @@ void FunctionStackPoisoner::poisonStack() {
   uint64_t LocalStackSize = L.FrameSize;
   bool DoStackMalloc =
       ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
-  // Don't do dynamic alloca in presence of inline asm: too often it
-  // makes assumptions on which registers are available.
+  // Don't do dynamic alloca in presence of inline asm: too often it makes
+  // assumptions on which registers are available. Don't do stack malloc in the
+  // presence of inline asm on 32-bit platforms for the same reason.
   bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
+  DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32;
 
   Value *StaticAlloca =
       DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index b3925ee..06d5aed 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -753,7 +753,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       // Patch the pointer to LLVM function in debug info descriptor.
       auto DI = FunctionDIs.find(&F);
       if (DI != FunctionDIs.end())
-        DI->second.replaceFunction(&F);
+        DI->second->replaceFunction(&F);
 
       UnwrappedFnMap[WrappedFnCst] = &F;
       *i = NewF;
@@ -1075,8 +1075,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
   }
   case 2: {
     IRBuilder<> IRB(Pos);
-    Value *ShadowAddr1 =
-        IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+    Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
+                                       ConstantInt::get(DFS.IntptrTy, 1));
     return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
                           IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
   }
@@ -1127,7 +1127,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
       BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
       DT.addNewBlock(NextBB, LastBr->getParent());
       IRBuilder<> NextIRB(NextBB);
-      WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
+                                   ConstantInt::get(DFS.IntptrTy, 1));
       Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
       ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
       LastBr->setSuccessor(0, NextBB);
@@ -1213,7 +1214,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
     Value *ShadowVecAddr =
         IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
     do {
-      Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+      Value *CurShadowVecAddr =
+          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
       IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
       Size -= ShadowVecSize;
       ++Offset;
@@ -1221,7 +1223,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
     Offset *= ShadowVecSize;
   }
   while (Size > 0) {
-    Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+    Value *CurShadowAddr =
+        IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
     IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
     --Size;
     ++Offset;
@@ -1469,17 +1472,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
           Args.push_back(DFSF.getShadow(*i));
 
         if (FT->isVarArg()) {
-          auto LabelVAAlloca =
-              new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy,
-                                            CS.arg_size() - FT->getNumParams()),
-                             "labelva", DFSF.F->getEntryBlock().begin());
+          auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
+                                           CS.arg_size() - FT->getNumParams());
+          auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva",
+                                               DFSF.F->getEntryBlock().begin());
 
           for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
-            auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n);
+            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
             IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
           }
 
-          Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0));
+          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
         }
 
         if (!FT->getReturnType()->isVoidTy()) {
@@ -1565,10 +1568,11 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
       ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
       AllocaInst *VarArgShadow =
           new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
-      Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
       for (unsigned n = 0; i != e; ++i, ++n) {
-        IRB.CreateStore(DFSF.getShadow(*i),
-                        IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+        IRB.CreateStore(
+            DFSF.getShadow(*i),
+            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
         Args.push_back(*i);
       }
     }
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index a793e69..368a81d 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -149,10 +149,10 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
   return new GCOVProfiler(Options);
 }
 
-static StringRef getFunctionName(DISubprogram SP) {
-  if (!SP.getLinkageName().empty())
-    return SP.getLinkageName();
-  return SP.getName();
+static StringRef getFunctionName(MDSubprogram *SP) {
+  if (!SP->getLinkageName().empty())
+    return SP->getLinkageName();
+  return SP->getName();
 }
 
 namespace {
@@ -163,7 +163,7 @@ namespace {
     static const char *const BlockTag;
     static const char *const EdgeTag;
 
-    GCOVRecord() {}
+    GCOVRecord() = default;
 
     void writeBytes(const char *Bytes, int Size) {
       os->write(Bytes, Size);
@@ -315,7 +315,7 @@ namespace {
            ReturnBlock(1, os) {
       this->os = os;
 
-      Function *F = SP.getFunction();
+      Function *F = SP->getFunction();
       DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
 
       uint32_t i = 0;
@@ -330,7 +330,7 @@ namespace {
 
       std::string FunctionNameAndLine;
       raw_string_ostream FNLOS(FunctionNameAndLine);
-      FNLOS << getFunctionName(SP) << SP.getLineNumber();
+      FNLOS << getFunctionName(SP) << SP->getLine();
       FNLOS.flush();
       FuncChecksum = hash_value(FunctionNameAndLine);
     }
@@ -366,7 +366,7 @@ namespace {
     void writeOut() {
       writeBytes(FunctionTag, 4);
       uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
-          1 + lengthOfGCOVString(SP.getFilename()) + 1;
+                          1 + lengthOfGCOVString(SP->getFilename()) + 1;
       if (UseCfgChecksum)
         ++BlockLen;
       write(BlockLen);
@@ -375,8 +375,8 @@ namespace {
       if (UseCfgChecksum)
         write(CfgChecksum);
       writeGCOVString(getFunctionName(SP));
-      writeGCOVString(SP.getFilename());
-      write(SP.getLineNumber());
+      writeGCOVString(SP->getFilename());
+      write(SP->getLine());
 
       // Emit count of blocks.
       writeBytes(BlockTag, 4);
@@ -437,12 +437,12 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
     }
   }
 
-  SmallString<128> Filename = CU.getFilename();
+  SmallString<128> Filename = CU->getFilename();
   sys::path::replace_extension(Filename, NewStem);
   StringRef FName = sys::path::filename(Filename);
   SmallString<128> CurPath;
   if (sys::fs::current_path(CurPath)) return FName;
-  sys::path::append(CurPath, FName.str());
+  sys::path::append(CurPath, FName);
   return CurPath.str();
 }
 
@@ -466,7 +466,8 @@ static bool functionHasLines(Function *F) {
       if (isa<DbgInfoIntrinsic>(I)) continue;
 
       const DebugLoc &Loc = I->getDebugLoc();
-      if (Loc.isUnknown()) continue;
+      if (!Loc)
+        continue;
 
       // Artificial lines such as calls to the global constructors.
       if (Loc.getLine() == 0) continue;
@@ -486,21 +487,14 @@ void GCOVProfiler::emitProfileNotes() {
     // this pass over the original .o's as they're produced, or run it after
     // LTO, we'll generate the same .gcno files.
 
-    DICompileUnit CU(CU_Nodes->getOperand(i));
+    DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
     std::error_code EC;
     raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);
     std::string EdgeDestinations;
 
-    DIArray SPs = CU.getSubprograms();
     unsigned FunctionIdent = 0;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      assert((!SP || SP.isSubprogram()) &&
-        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
-      if (!SP)
-        continue;
-
-      Function *F = SP.getFunction();
+    for (auto *SP : CU->getSubprograms()) {
+      Function *F = SP->getFunction();
       if (!F) continue;
       if (!functionHasLines(F)) continue;
 
@@ -536,16 +530,18 @@ void GCOVProfiler::emitProfileNotes() {
           if (isa<DbgInfoIntrinsic>(I)) continue;
 
           const DebugLoc &Loc = I->getDebugLoc();
-          if (Loc.isUnknown()) continue;
+          if (!Loc)
+            continue;
 
           // Artificial lines such as calls to the global constructors.
           if (Loc.getLine() == 0) continue;
 
           if (Line == Loc.getLine()) continue;
           Line = Loc.getLine();
-          if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+          if (SP != getDISubprogram(Loc.getScope()))
+            continue;
 
-          GCOVLines &Lines = Block.getFile(SP.getFilename());
+          GCOVLines &Lines = Block.getFile(SP->getFilename());
           Lines.addLine(Loc.getLine());
         }
       }
@@ -574,16 +570,10 @@ bool GCOVProfiler::emitProfileArcs() {
   bool Result = false;
   bool InsertIndCounterIncrCode = false;
   for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-    DICompileUnit CU(CU_Nodes->getOperand(i));
-    DIArray SPs = CU.getSubprograms();
+    DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      assert((!SP || SP.isSubprogram()) &&
-        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
-      if (!SP)
-        continue;
-      Function *F = SP.getFunction();
+    for (auto *SP : CU->getSubprograms()) {
+      Function *F = SP->getFunction();
       if (!F) continue;
       if (!functionHasLines(F)) continue;
       if (!Result) Result = true;
@@ -603,7 +593,7 @@ bool GCOVProfiler::emitProfileArcs() {
                            GlobalValue::InternalLinkage,
                            Constant::getNullValue(CounterTy),
                            "__llvm_gcov_ctr");
-      CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+      CountersBySP.push_back(std::make_pair(Counters, SP));
 
       UniqueVector<BasicBlock *> ComplexEdgePreds;
       UniqueVector<BasicBlock *> ComplexEdgeSuccs;
@@ -628,7 +618,8 @@ bool GCOVProfiler::emitProfileArcs() {
             SmallVector<Value *, 2> Idx;
             Idx.push_back(Builder.getInt64(0));
             Idx.push_back(Sel);
-            Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+            Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(),
+                                                       Counters, Idx);
             Value *Count = Builder.CreateLoad(Counter);
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
@@ -855,7 +846,7 @@ Function *GCOVProfiler::insertCounterWriteout(
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
   if (CU_Nodes) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CU(CU_Nodes->getOperand(i));
+      DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
       std::string FilenameGcda = mangleName(CU, "gcda");
       uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
       Builder.CreateCall3(StartFile,
@@ -863,7 +854,7 @@ Function *GCOVProfiler::insertCounterWriteout(
                           Builder.CreateGlobalStringPtr(ReversedVersion),
                           Builder.getInt32(CfgChecksum));
       for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
-        DISubprogram SP(CountersBySP[j].second);
+        auto *SP = cast_or_null<MDSubprogram>(CountersBySP[j].second);
         uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
         Builder.CreateCall5(
             EmitFunction, Builder.getInt32(j),
@@ -922,7 +913,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
   Arg = std::next(Fn->arg_begin());
   Arg->setName("counters");
-  Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
+  Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
   Value *Counter = Builder.CreateLoad(GEP, "counter");
   Cond = Builder.CreateICmpEQ(Counter,
                               Constant::getNullValue(
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c2aa1e2..2b35066 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -623,8 +623,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       Value *IntptrOriginPtr =
           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
-        Value *Ptr =
-            i ? IRB.CreateConstGEP1_32(IntptrOriginPtr, i) : IntptrOriginPtr;
+        Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
+                       : IntptrOriginPtr;
         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
         Ofs += IntptrSize / kOriginSize;
         CurrentAlignment = IntptrAlignment;
@@ -632,7 +632,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
 
     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
-      Value *GEP = i ? IRB.CreateConstGEP1_32(OriginPtr, i) : OriginPtr;
+      Value *GEP =
+          i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
       CurrentAlignment = kMinOriginAlignment;
     }
@@ -2843,7 +2844,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
       Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
       Value *OverflowArgAreaShadowPtr =
         MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
-      Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
+      Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+                                             AMD64FpEndOffset);
       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
     }
   }
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 289675e..662513d 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -140,16 +140,6 @@ class SanitizerCoverageModule : public ModulePass {
 
 }  // namespace
 
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (Function *F = dyn_cast<Function>(FuncOrBitcast))
-     return F;
-  std::string Err;
-  raw_string_ostream Stream(Err);
-  Stream << "SanitizerCoverage interface function redefined: "
-         << *FuncOrBitcast;
-  report_fatal_error(Err);
-}
-
 bool SanitizerCoverageModule::runOnModule(Module &M) {
   if (!CoverageLevel) return false;
   C = &(M.getContext());
@@ -167,16 +157,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc));
   appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
 
-  SanCovFunction = checkInterfaceFunction(
+  SanCovFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr));
-  SanCovWithCheckFunction = checkInterfaceFunction(
+  SanCovWithCheckFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr));
-  SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction(
-      kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
-  SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
-      kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
-
-  SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction(
+  SanCovIndirCallFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
+  SanCovTraceCmpFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+
+  SanCovModuleInit = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy,
       Int8PtrTy, Int8PtrTy, nullptr));
   SanCovModuleInit->setLinkage(Function::ExternalLinkage);
@@ -186,9 +178,9 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
                             /*hasSideEffects=*/true);
 
   if (ClExperimentalTracing) {
-    SanCovTraceEnter = checkInterfaceFunction(
+    SanCovTraceEnter = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
-    SanCovTraceBB = checkInterfaceFunction(
+    SanCovTraceBB = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
   }
 
@@ -316,7 +308,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
     IRBuilder<> IRB(I);
     CallSite CS(I);
     Value *Callee = CS.getCalledValue();
-    if (dyn_cast<InlineAsm>(Callee)) continue;
+    if (isa<InlineAsm>(Callee)) continue;
     GlobalVariable *CalleeCache = new GlobalVariable(
         *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
         Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
@@ -366,8 +358,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   }
 
   bool IsEntryBB = &BB == &F.getEntryBlock();
-  DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown()
-                          ? IP->getDebugLoc().getFnDebugLoc(*C)
+  DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc()
+                          ? IP->getDebugLoc().getFnDebugLoc()
                           : IP->getDebugLoc();
   IRBuilder<> IRB(IP);
   IRB.SetCurrentDebugLocation(EntryLoc);
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index c3ba722..aa8ee5a 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -129,54 +129,48 @@ FunctionPass *llvm::createThreadSanitizerPass() {
   return new ThreadSanitizer();
 }
 
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (Function *F = dyn_cast<Function>(FuncOrBitcast))
-     return F;
-  FuncOrBitcast->dump();
-  report_fatal_error("ThreadSanitizer interface function redefined");
-}
-
 void ThreadSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(M.getContext());
   // Initialize the callbacks.
-  TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
-  TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_func_exit", IRB.getVoidTy(), nullptr));
+  TsanFuncExit = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
   OrdTy = IRB.getInt32Ty();
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
     const size_t ByteSize = 1 << i;
     const size_t BitSize = ByteSize * 8;
     SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
-    TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
-    TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<64> UnalignedReadName("__tsan_unaligned_read" +
         itostr(ByteSize));
-    TsanUnalignedRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+    TsanUnalignedRead[i] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +
         itostr(ByteSize));
-    TsanUnalignedWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+    TsanUnalignedWrite[i] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
                                    "_load");
-    TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
+    TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
+        M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
 
     SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
                                     "_store");
-    TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
-        nullptr));
+    TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
 
     for (int op = AtomicRMWInst::FIRST_BINOP;
         op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -199,34 +193,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
       else
         continue;
       SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
-      TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction(
-          RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
+      TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
+          M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
     }
 
     SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
                                   "_compare_exchange_val");
-    TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
   }
-  TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), nullptr));
-  TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanVptrUpdate = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr));
+  TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
-  TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr));
-  TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr));
 
-  MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-    IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-    IntptrTy, nullptr));
-  MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
-    IntptrTy, nullptr));
+  MemmoveFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, nullptr));
+  MemcpyFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, nullptr));
+  MemsetFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt32Ty(), IntptrTy, nullptr));
 }
 
 bool ThreadSanitizer::doInitialization(Module &M) {
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 87de33b..d4fef10 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -54,8 +54,6 @@ public:
                             RetainAutorelease(nullptr),
                             RetainAutoreleaseRV(nullptr) { }
 
-  ~ARCRuntimeEntryPoints() { }
-
   void init(Module *M) {
     TheModule = M;
     AutoreleaseRV = nullptr;
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index b197c97..4edd029 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -45,7 +45,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
   default: break;
   }
 
-  ImmutableCallSite CS = static_cast<const Value *>(Inst);
+  ImmutableCallSite CS(Inst);
   assert(CS && "Only calls can alter reference counts!");
 
   // See if AliasAnalysis can help us with the call.
@@ -99,7 +99,7 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
     // of any other dynamic reference-counted pointers.
     if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))
       return false;
-  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+  } else if (auto CS = ImmutableCallSite(Inst)) {
     // For calls, just check the arguments (and not the callee operand).
     for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
          OE = CS.arg_end(); OI != OE; ++OI) {
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 5aa2b97..8918909 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -50,9 +50,9 @@ struct AlignmentFromAssumptions : public FunctionPass {
     initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnFunction(Function &F);
+  bool runOnFunction(Function &F) override;
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<ScalarEvolution>();
     AU.addRequired<DominatorTreeWrapperPass>();
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index cf30f39..16f2ead 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -11,6 +11,7 @@ transforms_scalar_SRC_FILES := \
   DeadStoreElimination.cpp \
   EarlyCSE.cpp \
   FlattenCFGPass.cpp \
+  Float2Int.cpp \
   GVN.cpp \
   IndVarSimplify.cpp \
   InductiveRangeCheckElimination.cpp \
@@ -30,6 +31,7 @@ transforms_scalar_SRC_FILES := \
   LowerExpectIntrinsic.cpp \
   MemCpyOptimizer.cpp \
   MergedLoadStoreMotion.cpp \
+  NaryReassociate.cpp \
   PartiallyInlineLibCalls.cpp \
   PlaceSafepoints.cpp \
   Reassociate.cpp \
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index d12fdb7..c29fcc3 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts
   DeadStoreElimination.cpp
   EarlyCSE.cpp
   FlattenCFGPass.cpp
+  Float2Int.cpp
   GVN.cpp
   InductiveRangeCheckElimination.cpp
   IndVarSimplify.cpp
@@ -28,6 +29,7 @@ add_llvm_library(LLVMScalarOpts
   LowerExpectIntrinsic.cpp
   MemCpyOptimizer.cpp
   MergedLoadStoreMotion.cpp
+  NaryReassociate.cpp
   PartiallyInlineLibCalls.cpp
   PlaceSafepoints.cpp
   Reassociate.cpp
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index cb8981b..01952cf 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -168,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
       return true;
     }
   }
-  if (CallSite CS = I) {
+  if (auto CS = CallSite(I)) {
     if (Function *F = CS.getCalledFunction()) {
       if (TLI && TLI->has(LibFunc::strcpy) &&
           F->getName() == TLI->getName(LibFunc::strcpy)) {
@@ -262,7 +262,7 @@ static bool isRemovable(Instruction *I) {
     }
   }
 
-  if (CallSite CS = I)
+  if (auto CS = CallSite(I))
     return CS.getInstruction()->use_empty();
 
   return false;
@@ -306,7 +306,7 @@ static Value *getStoredPointerOperand(Instruction *I) {
     }
   }
 
-  CallSite CS = I;
+  CallSite CS(I);
   // All the supported functions so far happen to have dest as their first
   // argument.
   return CS.getArgument(0);
@@ -780,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       continue;
     }
 
-    if (CallSite CS = cast<Value>(BBI)) {
+    if (auto CS = CallSite(BBI)) {
       // Remove allocation function calls from the list of dead stack objects; 
       // there can't be any references before the definition.
       if (isAllocLikeFn(BBI, TLI))
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
new file mode 100644
index 0000000..c931422
--- /dev/null
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -0,0 +1,540 @@
+//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Float2Int pass, which aims to demote floating
+// point operations to work on integers, where that is losslessly possible.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "float2int"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <deque>
+#include <functional> // For std::function
+using namespace llvm;
+
+// The algorithm is simple. Start at instructions that convert from the
+// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
+// graph, using an equivalence datastructure to unify graphs that interfere.
+//
+// Mappable instructions are those with an integer corrollary that, given
+// integer domain inputs, produce an integer output; fadd, for example.
+//
+// If a non-mappable instruction is seen, this entire def-use graph is marked
+// as non-transformable. If we see an instruction that converts from the 
+// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
+
+/// The largest integer type worth dealing with.
+static cl::opt<unsigned>
+MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
+             cl::desc("Max integer bitwidth to consider in float2int"
+                      "(default=64)"));
+
+namespace {
+  struct Float2Int : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    Float2Int() : FunctionPass(ID) {
+      initializeFloat2IntPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+    }
+
+    void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
+    ConstantRange seen(Instruction *I, ConstantRange R);
+    ConstantRange badRange();
+    ConstantRange unknownRange();
+    ConstantRange validateRange(ConstantRange R);
+    void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots);
+    void walkForwards();
+    bool validateAndTransform();
+    Value *convert(Instruction *I, Type *ToTy);
+    void cleanup();
+
+    MapVector<Instruction*, ConstantRange > SeenInsts;
+    SmallPtrSet<Instruction*,8> Roots;
+    EquivalenceClasses<Instruction*> ECs;
+    MapVector<Instruction*, Value*> ConvertedInsts;
+    LLVMContext *Ctx;
+  };
+}
+
+char Float2Int::ID = 0;
+INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+
+// Given a FCmp predicate, return a matching ICmp predicate if one
+// exists, otherwise return BAD_ICMP_PREDICATE.
+static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
+  switch (P) {
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ:
+    return CmpInst::ICMP_EQ;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT:
+    return CmpInst::ICMP_SGT;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE:
+    return CmpInst::ICMP_SGE;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT:
+    return CmpInst::ICMP_SLT;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE:
+    return CmpInst::ICMP_SLE;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE:
+    return CmpInst::ICMP_NE;
+  default:
+    return CmpInst::BAD_ICMP_PREDICATE;
+  }
+}
+
+// Given a floating point binary operator, return the matching
+// integer version.
+static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case Instruction::FAdd: return Instruction::Add;
+  case Instruction::FSub: return Instruction::Sub;
+  case Instruction::FMul: return Instruction::Mul;
+  }
+}
+
+// Find the roots - instructions that convert from the FP domain to
+// integer domain.
+void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
+  for (auto &I : inst_range(F)) {
+    switch (I.getOpcode()) {
+    default: break;
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      Roots.insert(&I);
+      break;
+    case Instruction::FCmp:
+      if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) != 
+          CmpInst::BAD_ICMP_PREDICATE)
+        Roots.insert(&I);
+      break;
+    }
+  }
+}
+
+// Helper - mark I as having been traversed, having range R.
+ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
+  DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
+  if (SeenInsts.find(I) != SeenInsts.end())
+    SeenInsts.find(I)->second = R;
+  else
+    SeenInsts.insert(std::make_pair(I, R));
+  return R;
+}
+
+// Helper - get a range representing a poison value.
+ConstantRange Float2Int::badRange() {
+  return ConstantRange(MaxIntegerBW + 1, true);
+}
+ConstantRange Float2Int::unknownRange() {
+  return ConstantRange(MaxIntegerBW + 1, false);
+}
+ConstantRange Float2Int::validateRange(ConstantRange R) {
+  if (R.getBitWidth() > MaxIntegerBW + 1)
+    return badRange();
+  return R;
+}
+
+// The most obvious way to structure the search is a depth-first, eager
+// search from each root. However, that require direct recursion and so
+// can only handle small instruction sequences. Instead, we split the search
+// up into two phases:
+//   - walkBackwards:  A breadth-first walk of the use-def graph starting from
+//                     the roots. Populate "SeenInsts" with interesting
+//                     instructions and poison values if they're obvious and
+//                     cheap to compute. Calculate the equivalance set structure
+//                     while we're here too.
+//   - walkForwards:  Iterate over SeenInsts in reverse order, so we visit
+//                     defs before their uses. Calculate the real range info.
+
+// Breadth-first walk of the use-def graph; determine the set of nodes 
+// we care about and eagerly determine if some of them are poisonous.
+void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
+  std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+
+    if (SeenInsts.find(I) != SeenInsts.end())
+      // Seen already.
+      continue;
+
+    switch (I->getOpcode()) {
+      // FIXME: Handle select and phi nodes.
+    default:
+      // Path terminated uncleanly.
+      seen(I, badRange());
+      break;
+
+    case Instruction::UIToFP: {
+      // Path terminated cleanly.
+      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1);
+      APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1);
+      seen(I, validateRange(ConstantRange(Min, Max)));
+      continue;
+    }
+
+    case Instruction::SIToFP: {
+      // Path terminated cleanly.
+      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1);
+      APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1);
+      seen(I, validateRange(ConstantRange(SMin, SMax)));
+      continue;
+    }
+
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FCmp:
+      seen(I, unknownRange());
+      break;
+    }
+  
+    for (Value *O : I->operands()) {
+      if (Instruction *OI = dyn_cast<Instruction>(O)) {
+        // Unify def-use chains if they interfere.
+        ECs.unionSets(I, OI);
+	if (SeenInsts.find(I)->second != badRange())
+          Worklist.push_back(OI);
+      } else if (!isa<ConstantFP>(O)) {      
+        // Not an instruction or ConstantFP? we can't do anything.
+        seen(I, badRange());
+      }
+    }
+  }
+}
+
+// Walk forwards down the list of seen instructions, so we visit defs before
+// uses.
+void Float2Int::walkForwards() {
+  for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
+    if (It->second != unknownRange())
+      continue;
+
+    Instruction *I = It->first;
+    std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
+    switch (I->getOpcode()) {
+      // FIXME: Handle select and phi nodes.
+    default:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+      llvm_unreachable("Should have been handled in walkForwards!");
+
+    case Instruction::FAdd:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FAdd is a binary operator!");
+        return Ops[0].add(Ops[1]);
+      };
+      break;
+
+    case Instruction::FSub:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FSub is a binary operator!");
+        return Ops[0].sub(Ops[1]);
+      };
+      break;
+
+    case Instruction::FMul:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FMul is a binary operator!");
+        return Ops[0].multiply(Ops[1]);
+      };
+      break;
+
+    //
+    // Root-only instructions - we'll only see these if they're the
+    //                          first node in a walk.
+    //
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
+        return Ops[0];
+      };
+      break;
+
+    case Instruction::FCmp:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FCmp is a binary operator!");
+        return Ops[0].unionWith(Ops[1]);
+      };
+      break;
+    }
+
+    bool Abort = false;
+    SmallVector<ConstantRange,4> OpRanges;
+    for (Value *O : I->operands()) {
+      if (Instruction *OI = dyn_cast<Instruction>(O)) {
+        assert(SeenInsts.find(OI) != SeenInsts.end() &&
+	       "def not seen before use!");
+        OpRanges.push_back(SeenInsts.find(OI)->second);
+      } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
+        // Work out if the floating point number can be losslessly represented
+        // as an integer.
+        // APFloat::convertToInteger(&Exact) purports to do what we want, but
+        // the exactness can be too precise. For example, negative zero can
+        // never be exactly converted to an integer.
+        //
+        // Instead, we ask APFloat to round itself to an integral value - this
+        // preserves sign-of-zero - then compare the result with the original.
+        //
+        APFloat F = CF->getValueAPF();
+
+        // First, weed out obviously incorrect values. Non-finite numbers
+        // can't be represented and neither can negative zero, unless 
+        // we're in fast math mode.
+        if (!F.isFinite() ||
+            (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
+	     !I->hasNoSignedZeros())) {
+          seen(I, badRange());
+          Abort = true;
+          break;
+        }
+
+        APFloat NewF = F;
+        auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
+        if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) {
+          seen(I, badRange());
+          Abort = true;
+          break;
+        }
+        // OK, it's representable. Now get it.
+        APSInt Int(MaxIntegerBW+1, false);
+        bool Exact;
+        CF->getValueAPF().convertToInteger(Int,
+                                           APFloat::rmNearestTiesToEven,
+                                           &Exact);
+        OpRanges.push_back(ConstantRange(Int));
+      } else {
+        llvm_unreachable("Should have already marked this as badRange!");
+      }
+    }
+
+    // Reduce the operands' ranges to a single range and return.
+    if (!Abort)
+      seen(I, Op(OpRanges));    
+  }
+}
+
+// If there is a valid transform to be done, do it.
+bool Float2Int::validateAndTransform() {
+  bool MadeChange = false;
+
+  // Iterate over every disjoint partition of the def-use graph.
+  for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
+    ConstantRange R(MaxIntegerBW + 1, false);
+    bool Fail = false;
+    Type *ConvertedToTy = nullptr;
+
+    // For every member of the partition, union all the ranges together.
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+         MI != ME; ++MI) {
+      Instruction *I = *MI;
+      auto SeenI = SeenInsts.find(I);
+      if (SeenI == SeenInsts.end())
+        continue;
+
+      R = R.unionWith(SeenI->second);
+      // We need to ensure I has no users that have not been seen.
+      // If it does, transformation would be illegal.
+      //
+      // Don't count the roots, as they terminate the graphs.
+      if (Roots.count(I) == 0) {
+        // Set the type of the conversion while we're here.
+        if (!ConvertedToTy)
+          ConvertedToTy = I->getType();
+        for (User *U : I->users()) {
+          Instruction *UI = dyn_cast<Instruction>(U);
+          if (!UI || SeenInsts.find(UI) == SeenInsts.end()) {
+            DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
+            Fail = true;
+            break;
+          }
+        }
+      }
+      if (Fail)
+        break;
+    }
+
+    // If the set was empty, or we failed, or the range is poisonous,
+    // bail out.
+    if (ECs.member_begin(It) == ECs.member_end() || Fail ||
+        R.isFullSet() || R.isSignWrappedSet())
+      continue;
+    assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
+    
+    // The number of bits required is the maximum of the upper and
+    // lower limits, plus one so it can be signed.
+    unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
+                              R.getUpper().getMinSignedBits()) + 1;
+    DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
+
+    // If we've run off the realms of the exactly representable integers,
+    // the floating point result will differ from an integer approximation.
+
+    // Do we need more bits than are in the mantissa of the type we converted
+    // to? semanticsPrecision returns the number of mantissa bits plus one
+    // for the sign bit.
+    unsigned MaxRepresentableBits
+      = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1;
+    if (MinBW > MaxRepresentableBits) {
+      DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
+      continue;
+    }
+    if (MinBW > 64) {
+      DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
+      continue;
+    }
+
+    // OK, R is known to be representable. Now pick a type for it.
+    // FIXME: Pick the smallest legal type that will fit.
+    Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+         MI != ME; ++MI)
+      convert(*MI, Ty);
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+Value *Float2Int::convert(Instruction *I, Type *ToTy) {
+  if (ConvertedInsts.find(I) != ConvertedInsts.end())
+    // Already converted this instruction.
+    return ConvertedInsts[I];
+
+  SmallVector<Value*,4> NewOperands;
+  for (Value *V : I->operands()) {
+    // Don't recurse if we're an instruction that terminates the path.
+    if (I->getOpcode() == Instruction::UIToFP ||
+        I->getOpcode() == Instruction::SIToFP) {
+      NewOperands.push_back(V);
+    } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
+      NewOperands.push_back(convert(VI, ToTy));
+    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+      bool Exact;
+      CF->getValueAPF().convertToInteger(Val,
+                                         APFloat::rmNearestTiesToEven,
+                                         &Exact);
+      NewOperands.push_back(ConstantInt::get(ToTy, Val));
+    } else {
+      llvm_unreachable("Unhandled operand type?");
+    }
+  }
+
+  // Now create a new instruction.
+  IRBuilder<> IRB(I);
+  Value *NewV = nullptr;
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unhandled instruction!");
+
+  case Instruction::FPToUI:
+    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType());
+    break;
+
+  case Instruction::FPToSI:
+    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType());
+    break;
+
+  case Instruction::FCmp: {
+    CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate());
+    assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
+    NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName());
+    break;
+  }
+
+  case Instruction::UIToFP:
+    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy);
+    break;
+
+  case Instruction::SIToFP:
+    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
+    break;
+
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()),
+                           NewOperands[0], NewOperands[1],
+                           I->getName());
+    break;
+  }
+
+  // If we're a root instruction, RAUW.
+  if (Roots.count(I))
+    I->replaceAllUsesWith(NewV);
+
+  ConvertedInsts[I] = NewV;
+  return NewV;
+}
+
+// Perform dead code elimination on the instructions we just modified.
+void Float2Int::cleanup() {
+  for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
+       I != E; ++I)
+    I->first->eraseFromParent();
+}
+
+bool Float2Int::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
+  // Clear out all state.
+  ECs = EquivalenceClasses<Instruction*>();
+  SeenInsts.clear();
+  ConvertedInsts.clear();
+  Roots.clear();
+
+  Ctx = &F.getParent()->getContext();
+
+  findRoots(F, Roots);
+
+  walkBackwards(Roots);
+  walkForwards();
+
+  bool Modified = validateAndTransform();
+  if (Modified)
+    cleanup();
+  return Modified;
+}
+
+FunctionPass *llvm::createFloat2IntPass() {
+  return new Float2Int();
+}
+
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c73e60f..97d5b6d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1102,7 +1102,8 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
                                  Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+                                       OffsetCst);
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   if (ConstantFoldLoadFromConstPtr(Src, DL))
     return Offset;
@@ -1263,7 +1264,8 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
                                  Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+                                       OffsetCst);
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   return ConstantFoldLoadFromConstPtr(Src, DL);
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 51e8041..ab8e5b8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1274,55 +1274,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
 //  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
 //===----------------------------------------------------------------------===//
 
-/// Check for expressions that ScalarEvolution generates to compute
-/// BackedgeTakenInfo. If these expressions have not been reduced, then
-/// expanding them may incur additional cost (albeit in the loop preheader).
-static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
-                                SmallPtrSetImpl<const SCEV*> &Processed,
-                                ScalarEvolution *SE) {
-  if (!Processed.insert(S).second)
-    return false;
-
-  // If the backedge-taken count is a UDiv, it's very likely a UDiv that
-  // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
-  // precise expression, rather than a UDiv from the user's code. If we can't
-  // find a UDiv in the code with some simple searching, assume the former and
-  // forego rewriting the loop.
-  if (isa<SCEVUDivExpr>(S)) {
-    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
-    if (!OrigCond) return true;
-    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
-    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
-    if (R != S) {
-      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
-      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
-      if (L != S)
-        return true;
-    }
-  }
-
-  // Recurse past add expressions, which commonly occur in the
-  // BackedgeTakenCount. They may already exist in program code, and if not,
-  // they are not too expensive rematerialize.
-  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I) {
-      if (isHighCostExpansion(*I, BI, Processed, SE))
-        return true;
-    }
-    return false;
-  }
-
-  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
-  // the exit condition.
-  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
-    return true;
-
-  // If we haven't recognized an expensive SCEV pattern, assume it's an
-  // expression produced by program code.
-  return false;
-}
-
 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
 /// count expression can be safely and cheaply expanded into an instruction
 /// sequence that can be used by LinearFunctionTestReplace.
@@ -1336,7 +1287,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
 /// However, we don't yet have a strong motivation for converting loop tests
 /// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
+                                        SCEVExpander &Rewriter) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
       BackedgeTakenCount->isZero())
@@ -1346,12 +1298,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
     return false;
 
   // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
+  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
     return false;
 
-  SmallPtrSet<const SCEV*, 8> Processed;
-  if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
+  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
     return false;
 
   return true;
@@ -1637,7 +1587,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
            && "unit stride pointer IV must be i8*");
 
     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
-    return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+    return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
   }
   else {
     // In any other case, convert both IVInit and IVCount to integers before
@@ -1691,7 +1641,7 @@ LinearFunctionTestReplace(Loop *L,
                           const SCEV *BackedgeTakenCount,
                           PHINode *IndVar,
                           SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
 
   // Initialize CmpIndVar and IVCount to their preincremented values.
   Value *CmpIndVar = IndVar;
@@ -1936,7 +1886,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.  We can currently only handle loops with a single exit.
-  if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
+  if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
     if (IndVar) {
       // Check preconditions for proper SCEVExpander operation. SCEV does not
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 1f33f72..c19cd19 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -41,9 +41,9 @@ struct PointerOffsetPair {
 };
 
 struct LoadPOPPair {
+  LoadPOPPair() = default;
   LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
       : Load(L), POP(P), InsertOrder(O) {}
-  LoadPOPPair() {}
   LoadInst *Load;
   PointerOffsetPair POP;
   /// \brief The new load needs to be created before the first load in IR order.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 8445d5f..099f227 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -28,7 +28,7 @@
 //
 // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
 // it's useful to think about these as the same register, with some uses using
-// the value of the register before the add and some using // it after. In this
+// the value of the register before the add and some using it after. In this
 // example, the icmp is a post-increment user, since it uses %i.next, which is
 // the value of the induction variable after the increment. The other common
 // case of post-increment users is users outside the loop.
@@ -112,8 +112,6 @@ public:
   /// a particular register.
   SmallBitVector UsedByIndices;
 
-  RegSortData() {}
-
   void print(raw_ostream &OS) const;
   void dump() const;
 };
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 600cbde..3de3b3d 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -165,6 +165,7 @@ namespace {
       UP.MaxCount = UINT_MAX;
       UP.Partial = CurrentAllowPartial;
       UP.Runtime = CurrentRuntime;
+      UP.AllowExpensiveTripCount = false;
       TTI.getUnrollingPreferences(L, UP);
     }
 
@@ -886,8 +887,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this,
-                  &LPM, &AC))
+  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
+                  TripMultiple, LI, this, &LPM, &AC))
     return false;
 
   return true;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2b5a078..d651473 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1045,7 +1045,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
         RepeatInstruction = processMemCpy(M);
       else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
         RepeatInstruction = processMemMove(M);
-      else if (CallSite CS = (Value*)I) {
+      else if (auto CS = CallSite(I)) {
         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
           if (CS.isByValArgument(i))
             MadeChange |= processByValArgument(CS, i);
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
new file mode 100644
index 0000000..fea7641
--- /dev/null
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -0,0 +1,252 @@
+//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates n-ary add expressions and eliminates the redundancy
+// exposed by the reassociation.
+//
+// A motivating example:
+//
+//   void foo(int a, int b) {
+//     bar(a + b);
+//     bar((a + 2) + b);
+//   }
+//
+// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
+// the above code to
+//
+//   int t = a + b;
+//   bar(t);
+//   bar(t + 2);
+//
+// However, the Reassociate pass is unable to do that because it processes each
+// instruction individually and believes (a + 2) + b is the best form according
+// to its rank system.
+//
+// To address this limitation, NaryReassociate reassociates an expression in a
+// form that reuses existing instructions. As a result, NaryReassociate can
+// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
+// (a + b) is computed before.
+//
+// NaryReassociate works as follows. For every instruction in the form of (a +
+// b) + c, it checks whether a + c or b + c is already computed by a dominating
+// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
+// c) + a and removes the redundancy accordingly. To efficiently look up whether
+// an expression is computed before, we store each instruction seen and its SCEV
+// into an SCEV-to-instruction map.
+//
+// Although the algorithm pattern-matches only ternary additions, it
+// automatically handles many >3-ary expressions by walking through the function
+// in the depth-first order. For example, given
+//
+//   (a + c) + d
+//   ((a + b) + c) + d
+//
+// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
+// ((a + c) + b) + d into ((a + c) + d) + b.
+//
+// Finally, the above dominator-based algorithm may need to be run multiple
+// iterations before emitting optimal code. One source of this need is that we
+// only split an operand when it is used only once. The above algorithm can
+// eliminate an instruction and decrease the usage count of its operands. As a
+// result, an instruction that previously had multiple uses may become a
+// single-use instruction and thus eligible for split consideration. For
+// example,
+//
+//   ac = a + c
+//   ab = a + b
+//   abc = ab + c
+//   ab2 = ab + b
+//   ab2c = ab2 + c
+//
+// In the first iteration, we cannot reassociate abc to ac+b because ab is used
+// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
+// result, ab2 becomes dead and ab will be used only once in the second
+// iteration.
+//
+// Limitations and TODO items:
+//
+// 1) We only considers n-ary adds for now. This should be extended and
+// generalized.
+//
+// 2) Besides arithmetic operations, similar reassociation can be applied to
+// GEPs. For example, if
+//   X = &arr[a]
+// dominates
+//   Y = &arr[a + b]
+// we may rewrite Y into X + b.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "nary-reassociate"
+
+namespace {
+class NaryReassociate : public FunctionPass {
+public:
+  static char ID;
+
+  NaryReassociate(): FunctionPass(ID) {
+    initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<ScalarEvolution>();
+    AU.addPreserved<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.setPreservesCFG();
+  }
+
+private:
+  // Runs only one iteration of the dominator-based algorithm. See the header
+  // comments for why we need multiple iterations.
+  bool doOneIteration(Function &F);
+  // Reasssociates I to a better form.
+  Instruction *tryReassociateAdd(Instruction *I);
+  // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
+  Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
+  // Rewrites I to LHS + RHS if LHS is computed already.
+  Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+
+  DominatorTree *DT;
+  ScalarEvolution *SE;
+  TargetLibraryInfo *TLI;
+  // A lookup table quickly telling which instructions compute the given SCEV.
+  // Note that there can be multiple instructions at different locations
+  // computing to the same SCEV, so we map a SCEV to an instruction list.  For
+  // example,
+  //
+  //   if (p1)
+  //     foo(a + b);
+  //   if (p2)
+  //     bar(a + b);
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+};
+} // anonymous namespace
+
+char NaryReassociate::ID = 0;
+INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
+                    false, false)
+
+FunctionPass *llvm::createNaryReassociatePass() {
+  return new NaryReassociate();
+}
+
+bool NaryReassociate::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = &getAnalysis<ScalarEvolution>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+  bool Changed = false, ChangedInThisIteration;
+  do {
+    ChangedInThisIteration = doOneIteration(F);
+    Changed |= ChangedInThisIteration;
+  } while (ChangedInThisIteration);
+  return Changed;
+}
+
+bool NaryReassociate::doOneIteration(Function &F) {
+  bool Changed = false;
+  SeenExprs.clear();
+  // Traverse the dominator tree in the depth-first order. This order makes sure
+  // all bases of a candidate are in Candidates when we process it.
+  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+    BasicBlock *BB = Node->getBlock();
+    for (auto I = BB->begin(); I != BB->end(); ++I) {
+      if (I->getOpcode() == Instruction::Add) {
+        if (Instruction *NewI = tryReassociateAdd(I)) {
+          Changed = true;
+          SE->forgetValue(I);
+          I->replaceAllUsesWith(NewI);
+          RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+          I = NewI;
+        }
+        // We should add the rewritten instruction because tryReassociateAdd may
+        // have invalidated the original one.
+        SeenExprs[SE->getSCEV(I)].push_back(I);
+      }
+    }
+  }
+  return Changed;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) {
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+    return NewI;
+  if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+    return NewI;
+  return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
+                                                Instruction *I) {
+  Value *A = nullptr, *B = nullptr;
+  // To be conservative, we reassociate I only when it is the only user of A+B.
+  if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
+    // I = (A + B) + RHS
+    //   = (A + RHS) + B or (B + RHS) + A
+    const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
+    const SCEV *RHSExpr = SE->getSCEV(RHS);
+    if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+      return NewI;
+    if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+      return NewI;
+  }
+  return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
+                                                 Value *RHS, Instruction *I) {
+  auto Pos = SeenExprs.find(LHSExpr);
+  // Bail out if LHSExpr is not previously seen.
+  if (Pos == SeenExprs.end())
+    return nullptr;
+
+  auto &LHSCandidates = Pos->second;
+  // Look for the closest dominator LHS of I that computes LHSExpr, and replace
+  // I with LHS + RHS.
+  //
+  // Because we traverse the dominator tree in the pre-order, a
+  // candidate that doesn't dominate the current instruction won't dominate any
+  // future instruction either. Therefore, we pop it out of the stack. This
+  // optimization makes the algorithm O(n).
+  while (!LHSCandidates.empty()) {
+    Instruction *LHS = LHSCandidates.back();
+    if (DT->dominates(LHS, I)) {
+      Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+      NewI->takeName(I);
+      return NewI;
+    }
+    LHSCandidates.pop_back();
+  }
+  return nullptr;
+}
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 944725a..536f2a6 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -217,7 +217,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
   BasicBlock *Current = Pred;
   while (true) {
     for (Instruction &I : *Current) {
-      if (CallSite CS = &I)
+      if (auto CS = CallSite(&I))
         // Note: Technically, needing a safepoint isn't quite the right
         // condition here.  We should instead be checking if the target method
         // has an
@@ -424,8 +424,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
     // We need to stop going forward as soon as we see a call that can
     // grow the stack (i.e. the call target has a non-zero frame
     // size).
-    if (CallSite CS = cursor) {
-      (void)CS; // Silence an unused variable warning by gcc 4.8.2
+    if (CallSite(cursor)) {
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(cursor)) {
         // llvm.assume(...) are not really calls.
         if (II->getIntrinsicID() == Intrinsic::assume) {
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index f5d21ff..ba48e0a 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
@@ -49,11 +50,20 @@ static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
 // Print the liveset found at the insert location
 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
                                   cl::init(false));
-static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size",
-                                      cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
+                                      cl::init(false));
 // Print out the base pointers for debugging
-static cl::opt<bool> PrintBasePointers("spp-print-base-pointers",
-                                       cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
+                                       cl::init(false));
+
+#ifdef XDEBUG
+static bool ClobberNonLive = true;
+#else
+static bool ClobberNonLive = false;
+#endif
+static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
+                                                  cl::location(ClobberNonLive),
+                                                  cl::Hidden);
 
 namespace {
 struct RewriteStatepointsForGC : public FunctionPass {
@@ -85,6 +95,22 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
                     "Make relocations explicit at statepoints", false, false)
 
 namespace {
+struct GCPtrLivenessData {
+  /// Values defined in this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
+  /// Values used in this block (and thus live); does not included values
+  /// killed within this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
+
+  /// Values live into this basic block (i.e. used by any
+  /// instruction in this basic block or ones reachable from here)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
+
+  /// Values live out of this basic block (i.e. live into
+  /// any successor block)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
+};
+
 // The type of the internal cache used inside the findBasePointers family
 // of functions.  From the callers perspective, this is an opaque type and
 // should not be inspected.
@@ -105,10 +131,6 @@ struct PartiallyConstructedSafepointRecord {
   /// Mapping from live pointers to a base-defining-value
   DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
 
-  /// Any new values which were added to the IR during base pointer analysis
-  /// for this safepoint
-  DenseSet<llvm::Value *> NewInsertedDefs;
-
   /// The *new* gc.statepoint instruction itself.  This produces the token
   /// that normal path gc.relocates and the gc.result are tied to.
   Instruction *StatepointToken;
@@ -119,6 +141,15 @@ struct PartiallyConstructedSafepointRecord {
 };
 }
 
+/// Compute the live-in set for every basic block in the function
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data);
+
+/// Given results from the dataflow liveness computation, find the set of live
+/// Values at a particular instruction.
+static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &out);
+
 // TODO: Once we can get to the GCStrategy, this becomes
 // Optional<bool> isGCManagedPointer(const Value *V) const override {
 
@@ -131,129 +162,46 @@ static bool isGCPointerType(const Type *T) {
   return false;
 }
 
-/// Return true if the Value is a gc reference type which is potentially used
-/// after the instruction 'loc'.  This is only used with the edge reachability
-/// liveness code.  Note: It is assumed the V dominates loc.
-static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT,
-                                LoopInfo *LI) {
-  if (!isGCPointerType(V.getType()))
-    return false;
-
-  if (V.use_empty())
-    return false;
-
-  // Given assumption that V dominates loc, this may be live
-  return true;
+// Return true if this type is one which a) is a gc pointer or contains a GC
+// pointer and b) is of a type this code expects to encounter as a live value.
+// (The insertion code will assert that a type which matches (a) and not (b)
+// is not encountered.)
+static bool isHandledGCPointerType(Type *T) {
+  // We fully support gc pointers
+  if (isGCPointerType(T))
+    return true;
+  // We partially support vectors of gc pointers. The code will assert if it
+  // can't handle something.
+  if (auto VT = dyn_cast<VectorType>(T))
+    if (isGCPointerType(VT->getElementType()))
+      return true;
+  return false;
 }
 
 #ifndef NDEBUG
-static bool isAggWhichContainsGCPtrType(Type *Ty) {
+/// Returns true if this type contains a gc pointer whether we know how to
+/// handle that type or not.
+static bool containsGCPtrType(Type *Ty) {
+  if (isGCPointerType(Ty))
+    return true;
   if (VectorType *VT = dyn_cast<VectorType>(Ty))
     return isGCPointerType(VT->getScalarType());
   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
-    return isGCPointerType(AT->getElementType()) ||
-           isAggWhichContainsGCPtrType(AT->getElementType());
+    return containsGCPtrType(AT->getElementType());
   if (StructType *ST = dyn_cast<StructType>(Ty))
-    return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
-                       [](Type *SubType) {
-                         return isGCPointerType(SubType) ||
-                                isAggWhichContainsGCPtrType(SubType);
-                       });
+    return std::any_of(
+        ST->subtypes().begin(), ST->subtypes().end(),
+        [](Type *SubType) { return containsGCPtrType(SubType); });
   return false;
 }
-#endif
-
-// Conservatively identifies any definitions which might be live at the
-// given instruction. The  analysis is performed immediately before the
-// given instruction. Values defined by that instruction are not considered
-// live.  Values used by that instruction are considered live.
-//
-// preconditions: valid IR graph, term is either a terminator instruction or
-// a call instruction, pred is the basic block of term, DT, LI are valid
-//
-// side effects: none, does not mutate IR
-//
-//  postconditions: populates liveValues as discussed above
-static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred,
-                                   DominatorTree &DT, LoopInfo *LI,
-                                   StatepointLiveSetTy &liveValues) {
-  liveValues.clear();
-
-  assert(isa<CallInst>(term) || isa<InvokeInst>(term) || term->isTerminator());
 
-  Function *F = pred->getParent();
-
-  auto is_live_gc_reference =
-      [&](Value &V) { return isLiveGCReferenceAt(V, term, DT, LI); };
-
-  // Are there any gc pointer arguments live over this point?  This needs to be
-  // special cased since arguments aren't defined in basic blocks.
-  for (Argument &arg : F->args()) {
-    assert(!isAggWhichContainsGCPtrType(arg.getType()) &&
-           "support for FCA unimplemented");
-
-    if (is_live_gc_reference(arg)) {
-      liveValues.insert(&arg);
-    }
-  }
-
-  // Walk through all dominating blocks - the ones which can contain
-  // definitions used in this block - and check to see if any of the values
-  // they define are used in locations potentially reachable from the
-  // interesting instruction.
-  BasicBlock *BBI = pred;
-  while (true) {
-    if (TraceLSP) {
-      errs() << "[LSP] Looking at dominating block " << pred->getName() << "\n";
-    }
-    assert(DT.dominates(BBI, pred));
-    assert(isPotentiallyReachable(BBI, pred, &DT) &&
-           "dominated block must be reachable");
-
-    // Walk through the instructions in dominating blocks and keep any
-    // that have a use potentially reachable from the block we're
-    // considering putting the safepoint in
-    for (Instruction &inst : *BBI) {
-      if (TraceLSP) {
-        errs() << "[LSP] Looking at instruction ";
-        inst.dump();
-      }
-
-      if (pred == BBI && (&inst) == term) {
-        if (TraceLSP) {
-          errs() << "[LSP] stopped because we encountered the safepoint "
-                    "instruction.\n";
-        }
-
-        // If we're in the block which defines the interesting instruction,
-        // we don't want to include any values as live which are defined
-        // _after_ the interesting line or as part of the line itself
-        // i.e. "term" is the call instruction for a call safepoint, the
-        // results of the call should not be considered live in that stackmap
-        break;
-      }
-
-      assert(!isAggWhichContainsGCPtrType(inst.getType()) &&
-             "support for FCA unimplemented");
-
-      if (is_live_gc_reference(inst)) {
-        if (TraceLSP) {
-          errs() << "[LSP] found live value for this safepoint ";
-          inst.dump();
-          term->dump();
-        }
-        liveValues.insert(&inst);
-      }
-    }
-    if (!DT.getNode(BBI)->getIDom()) {
-      assert(BBI == &F->getEntryBlock() &&
-             "failed to find a dominator for something other than "
-             "the entry block");
-      break;
-    }
-    BBI = DT.getNode(BBI)->getIDom()->getBlock();
-  }
+// Returns true if this is a type which a) is a gc pointer or contains a GC
+// pointer and b) is of a type which the code doesn't expect (i.e. first class
+// aggregates).  Used to trip assertions.
+static bool isUnhandledGCPointerType(Type *Ty) {
+  return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
 }
+#endif
 
 static bool order_by_name(llvm::Value *a, llvm::Value *b) {
   if (a->hasName() && b->hasName()) {
@@ -268,16 +216,17 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
   }
 }
 
-/// Find the initial live set. Note that due to base pointer
-/// insertion, the live set may be incomplete.
-static void
-analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
-                          PartiallyConstructedSafepointRecord &result) {
+// Conservatively identifies any definitions which might be live at the
+// given instruction. The  analysis is performed immediately before the
+// given instruction. Values defined by that instruction are not considered
+// live.  Values used by that instruction are considered live.
+static void analyzeParsePointLiveness(
+    DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
+    const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
   Instruction *inst = CS.getInstruction();
 
-  BasicBlock *BB = inst->getParent();
   StatepointLiveSetTy liveset;
-  findLiveGCValuesAtInst(inst, BB, DT, nullptr, liveset);
+  findLiveSetAtInst(inst, OriginalLivenessData, liveset);
 
   if (PrintLiveSet) {
     // Note: This output is used by several of the test cases
@@ -299,10 +248,49 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
   result.liveset = liveset;
 }
 
-/// True iff this value is the null pointer constant (of any pointer type)
-static bool LLVM_ATTRIBUTE_UNUSED isNullConstant(Value *V) {
-  return isa<Constant>(V) && isa<PointerType>(V->getType()) &&
-         cast<Constant>(V)->isNullValue();
+/// If we can trivially determine that this vector contains only base pointers,
+/// return the base instruction.
+static Value *findBaseOfVector(Value *I) {
+  assert(I->getType()->isVectorTy() &&
+         cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
+         "Illegal to ask for the base pointer of a non-pointer type");
+
+  // Each case parallels findBaseDefiningValue below, see that code for
+  // detailed motivation.
+
+  if (isa<Argument>(I))
+    // An incoming argument to the function is a base pointer
+    return I;
+
+  // We shouldn't see the address of a global as a vector value?
+  assert(!isa<GlobalVariable>(I) &&
+         "unexpected global variable found in base of vector");
+
+  // inlining could possibly introduce phi node that contains
+  // undef if callee has multiple returns
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with partially optimized
+    // code.
+    return I;
+
+  // Due to inheritance, this must be _after_ the global variable and undef
+  // checks
+  if (Constant *Con = dyn_cast<Constant>(I)) {
+    assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+           "order of checks wrong!");
+    assert(Con->isNullValue() && "null is the only case which makes sense");
+    return Con;
+  }
+
+  if (isa<LoadInst>(I))
+    return I;
+
+  // Note: This code is currently rather incomplete.  We are essentially only
+  // handling cases where the vector element is trivially a base pointer.  We
+  // need to update the entire base pointer construction algorithm to know how
+  // to track vector elements and potentially scalarize, but the case which
+  // would motivate the work hasn't shown up in real workloads yet.
+  llvm_unreachable("no base found for vector element");
 }
 
 /// Helper function for findBasePointer - Will return a value which either a)
@@ -312,52 +300,36 @@ static Value *findBaseDefiningValue(Value *I) {
   assert(I->getType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
 
-  // There are instructions which can never return gc pointer values.  Sanity
-  // check
-  // that this is actually true.
-  assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
-         !isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers");
-  assert((!isa<Instruction>(I) || isa<InvokeInst>(I) ||
-          !cast<Instruction>(I)->isTerminator()) &&
-         "With the exception of invoke terminators don't define values");
-  assert(!isa<StoreInst>(I) && !isa<FenceInst>(I) &&
-         "Can't be definitions to start with");
-  assert(!isa<ICmpInst>(I) && !isa<FCmpInst>(I) &&
-         "Comparisons don't give ops");
-  // There's a bunch of instructions which just don't make sense to apply to
-  // a pointer.  The only valid reason for this would be pointer bit
-  // twiddling which we're just not going to support.
-  assert((!isa<Instruction>(I) || !cast<Instruction>(I)->isBinaryOp()) &&
-         "Binary ops on pointer values are meaningless.  Unless your "
-         "bit-twiddling which we don't support");
-
-  if (Argument *Arg = dyn_cast<Argument>(I)) {
+  // This case is a bit of a hack - it only handles extracts from vectors which
+  // trivially contain only base pointers.  See note inside the function for
+  // how to improve this.
+  if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
+    Value *VectorOperand = EEI->getVectorOperand();
+    Value *VectorBase = findBaseOfVector(VectorOperand);
+    (void)VectorBase;
+    assert(VectorBase && "extract element not known to be a trivial base");
+    return EEI;
+  }
+
+  if (isa<Argument>(I))
     // An incoming argument to the function is a base pointer
     // We should have never reached here if this argument isn't an gc value
-    assert(Arg->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return Arg;
-  }
+    return I;
 
-  if (GlobalVariable *global = dyn_cast<GlobalVariable>(I)) {
+  if (isa<GlobalVariable>(I))
     // base case
-    assert(global->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return global;
-  }
+    return I;
 
   // inlining could possibly introduce phi node that contains
   // undef if callee has multiple returns
-  if (UndefValue *undef = dyn_cast<UndefValue>(I)) {
-    assert(undef->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return undef; // utterly meaningless, but useful for dealing with
-                  // partially optimized code.
-  }
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with
+    // partially optimized code.
+    return I;
 
   // Due to inheritance, this must be _after_ the global variable and undef
   // checks
-  if (Constant *con = dyn_cast<Constant>(I)) {
+  if (Constant *Con = dyn_cast<Constant>(I)) {
     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
            "order of checks wrong!");
     // Note: Finding a constant base for something marked for relocation
@@ -368,49 +340,30 @@ static Value *findBaseDefiningValue(Value *I) {
     // off a potentially null value and have proven it null.  We also use
     // null pointers in dead paths of relocation phis (which we might later
     // want to find a base pointer for).
-    assert(con->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    assert(con->isNullValue() && "null is the only case which makes sense");
-    return con;
+    assert(isa<ConstantPointerNull>(Con) &&
+           "null is the only case which makes sense");
+    return Con;
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(I)) {
-    Value *def = CI->stripPointerCasts();
-    assert(def->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
+    Value *Def = CI->stripPointerCasts();
     // If we find a cast instruction here, it means we've found a cast which is
     // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
     // handle int->ptr conversion.
-    assert(!isa<CastInst>(def) && "shouldn't find another cast here");
-    return findBaseDefiningValue(def);
+    assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
+    return findBaseDefiningValue(Def);
   }
 
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    if (LI->getType()->isPointerTy()) {
-      Value *Op = LI->getOperand(0);
-      (void)Op;
-      // Has to be a pointer to an gc object, or possibly an array of such?
-      assert(Op->getType()->isPointerTy());
-      return LI; // The value loaded is an gc base itself
-    }
-  }
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-    Value *Op = GEP->getOperand(0);
-    if (Op->getType()->isPointerTy()) {
-      return findBaseDefiningValue(Op); // The base of this GEP is the base
-    }
-  }
+  if (isa<LoadInst>(I))
+    return I; // The value loaded is an gc base itself
 
-  if (AllocaInst *alloc = dyn_cast<AllocaInst>(I)) {
-    // An alloca represents a conceptual stack slot.  It's the slot itself
-    // that the GC needs to know about, not the value in the slot.
-    assert(alloc->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return alloc;
-  }
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    // The base of this GEP is the base
+    return findBaseDefiningValue(GEP->getPointerOperand());
 
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     switch (II->getIntrinsicID()) {
+    case Intrinsic::experimental_gc_result_ptr:
     default:
       // fall through to general call handling
       break;
@@ -418,11 +371,6 @@ static Value *findBaseDefiningValue(Value *I) {
     case Intrinsic::experimental_gc_result_float:
     case Intrinsic::experimental_gc_result_int:
       llvm_unreachable("these don't produce pointers");
-    case Intrinsic::experimental_gc_result_ptr:
-      // This is just a special case of the CallInst check below to handle a
-      // statepoint with deopt args which hasn't been rewritten for GC yet.
-      // TODO: Assert that the statepoint isn't rewritten yet.
-      return II;
     case Intrinsic::experimental_gc_relocate: {
       // Rerunning safepoint insertion after safepoints are already
       // inserted is not supported.  It could probably be made to work,
@@ -440,41 +388,27 @@ static Value *findBaseDefiningValue(Value *I) {
   // We assume that functions in the source language only return base
   // pointers.  This should probably be generalized via attributes to support
   // both source language and internal functions.
-  if (CallInst *call = dyn_cast<CallInst>(I)) {
-    assert(call->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return call;
-  }
-  if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
-    assert(invoke->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return invoke;
-  }
+  if (isa<CallInst>(I) || isa<InvokeInst>(I))
+    return I;
 
   // I have absolutely no idea how to implement this part yet.  It's not
   // neccessarily hard, I just haven't really looked at it yet.
   assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
 
-  if (AtomicCmpXchgInst *cas = dyn_cast<AtomicCmpXchgInst>(I)) {
+  if (isa<AtomicCmpXchgInst>(I))
     // A CAS is effectively a atomic store and load combined under a
     // predicate.  From the perspective of base pointers, we just treat it
-    // like a load.  We loaded a pointer from a address in memory, that value
-    // had better be a valid base pointer.
-    return cas->getPointerOperand();
-  }
-  if (AtomicRMWInst *atomic = dyn_cast<AtomicRMWInst>(I)) {
-    assert(AtomicRMWInst::Xchg == atomic->getOperation() &&
-           "All others are binary ops which don't apply to base pointers");
-    // semantically, a load, store pair.  Treat it the same as a standard load
-    return atomic->getPointerOperand();
-  }
+    // like a load.
+    return I;
+
+  assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
+                                   "binary ops which don't apply to pointers");
 
   // The aggregate ops.  Aggregates can either be in the heap or on the
   // stack, but in either case, this is simply a field load.  As a result,
   // this is a defining definition of the base just like a load is.
-  if (ExtractValueInst *ev = dyn_cast<ExtractValueInst>(I)) {
-    return ev;
-  }
+  if (isa<ExtractValueInst>(I))
+    return I;
 
   // We should never see an insert vector since that would require we be
   // tracing back a struct value not a pointer value.
@@ -485,23 +419,21 @@ static Value *findBaseDefiningValue(Value *I) {
   // return a value which dynamically selects from amoung several base
   // derived pointers (each with it's own base potentially).  It's the job of
   // the caller to resolve these.
-  if (SelectInst *select = dyn_cast<SelectInst>(I)) {
-    return select;
-  }
-
-  return cast<PHINode>(I);
+  assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+         "missing instruction case in findBaseDefiningValing");
+  return I;
 }
 
 /// Returns the base defining value for this value.
-static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
-  Value *&Cached = cache[I];
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
+  Value *&Cached = Cache[I];
   if (!Cached) {
     Cached = findBaseDefiningValue(I);
   }
-  assert(cache[I] != nullptr);
+  assert(Cache[I] != nullptr);
 
   if (TraceLSP) {
-    errs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
+    dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
            << "\n";
   }
   return Cached;
@@ -509,25 +441,26 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
 
 /// Return a base pointer for this value if known.  Otherwise, return it's
 /// base defining value.
-static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &cache) {
-  Value *def = findBaseDefiningValueCached(I, cache);
-  auto Found = cache.find(def);
-  if (Found != cache.end()) {
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
+  Value *Def = findBaseDefiningValueCached(I, Cache);
+  auto Found = Cache.find(Def);
+  if (Found != Cache.end()) {
     // Either a base-of relation, or a self reference.  Caller must check.
     return Found->second;
   }
   // Only a BDV available
-  return def;
+  return Def;
 }
 
 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
 /// is it known to be a base pointer?  Or do we need to continue searching.
-static bool isKnownBaseResult(Value *v) {
-  if (!isa<PHINode>(v) && !isa<SelectInst>(v)) {
+static bool isKnownBaseResult(Value *V) {
+  if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
     // no recursion possible
     return true;
   }
-  if (cast<Instruction>(v)->getMetadata("is_base_value")) {
+  if (isa<Instruction>(V) &&
+      cast<Instruction>(V)->getMetadata("is_base_value")) {
     // This is a previously inserted base phi or select.  We know
     // that this is a base value.
     return true;
@@ -647,8 +580,7 @@ private:
 /// from.  For gc objects, this is simply itself.  On success, returns a value
 /// which is the base pointer.  (This is reliable and can be used for
 /// relocation.)  On failure, returns nullptr.
-static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
-                              DenseSet<llvm::Value *> &NewInsertedDefs) {
+static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
   Value *def = findBaseOrBDV(I, cache);
 
   if (isKnownBaseResult(def)) {
@@ -687,7 +619,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
     done = true;
     // Since we're adding elements to 'states' as we run, we can't keep
     // iterators into the set.
-    SmallVector<Value*, 16> Keys;
+    SmallVector<Value *, 16> Keys;
     Keys.reserve(states.size());
     for (auto Pair : states) {
       Value *V = Pair.first;
@@ -777,7 +709,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
   // We want to keep naming deterministic in the loop that follows, so
   // sort the keys before iteration.  This is useful in allowing us to
   // write stable tests. Note that there is no invalidation issue here.
-  SmallVector<Value*, 16> Keys;
+  SmallVector<Value *, 16> Keys;
   Keys.reserve(states.size());
   for (auto Pair : states) {
     Value *V = Pair.first;
@@ -792,13 +724,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
     assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
     if (!state.isConflict())
       continue;
-    
+
     if (isa<PHINode>(v)) {
       int num_preds =
           std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
       assert(num_preds > 0 && "how did we reach here");
       PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
-      NewInsertedDefs.insert(phi);
       // Add metadata marking this as a base value
       auto *const_1 = ConstantInt::get(
           Type::getInt32Ty(
@@ -815,7 +746,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
       UndefValue *undef = UndefValue::get(sel->getType());
       SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
                                                undef, "base_select", sel);
-      NewInsertedDefs.insert(basesel);
       // Add metadata marking this as a base value
       auto *const_1 = ConstantInt::get(
           Type::getInt32Ty(
@@ -838,7 +768,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
     assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
     if (!state.isConflict())
       continue;
-    
+
     if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
       PHINode *phi = cast<PHINode>(v);
       unsigned NumPHIValues = phi->getNumIncomingValues();
@@ -866,8 +796,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
             assert(states.count(base));
             base = states[base].getBase();
             assert(base != nullptr && "unknown PhiState!");
-            assert(NewInsertedDefs.count(base) &&
-                   "should have already added this in a prev. iteration!");
           }
 
           // In essense this assert states: the only way two
@@ -898,7 +826,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
         if (base->getType() != basephi->getType()) {
           base = new BitCastInst(base, basephi->getType(), "cast",
                                  InBB->getTerminator());
-          NewInsertedDefs.insert(base);
         }
         basephi->addIncoming(base, InBB);
       }
@@ -924,7 +851,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
         // The cast is needed since base traversal may strip away bitcasts
         if (base->getType() != basesel->getType()) {
           base = new BitCastInst(base, basesel->getType(), "cast", basesel);
-          NewInsertedDefs.insert(base);
         }
         basesel->setOperand(i, base);
       }
@@ -979,18 +905,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
 // post condition: PointerToBase contains one (derived, base) pair for every
 // pointer in live.  Note that derived can be equal to base if the original
 // pointer was a base pointer.
-static void findBasePointers(const StatepointLiveSetTy &live,
-                             DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
-                             DominatorTree *DT, DefiningValueMapTy &DVCache,
-                             DenseSet<llvm::Value *> &NewInsertedDefs) {
+static void
+findBasePointers(const StatepointLiveSetTy &live,
+                 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+                 DominatorTree *DT, DefiningValueMapTy &DVCache) {
   // For the naming of values inserted to be deterministic - which makes for
   // much cleaner and more stable tests - we need to assign an order to the
   // live values.  DenseSets do not provide a deterministic order across runs.
-  SmallVector<Value*, 64> Temp;
+  SmallVector<Value *, 64> Temp;
   Temp.insert(Temp.end(), live.begin(), live.end());
   std::sort(Temp.begin(), Temp.end(), order_by_name);
   for (Value *ptr : Temp) {
-    Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs);
+    Value *base = findBasePointer(ptr, DVCache);
     assert(base && "failed to find base pointer");
     PointerToBase[ptr] = base;
     assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
@@ -1001,10 +927,10 @@ static void findBasePointers(const StatepointLiveSetTy &live,
     // If you see this trip and like to live really dangerously, the code should
     // be correct, just with idioms the verifier can't handle.  You can try
     // disabling the verifier at your own substaintial risk.
-    assert(!isNullConstant(base) && "the relocation code needs adjustment to "
-                                    "handle the relocation of a null pointer "
-                                    "constant without causing false positives "
-                                    "in the safepoint ir verifier.");
+    assert(!isa<ConstantPointerNull>(base) &&
+           "the relocation code needs adjustment to handle the relocation of "
+           "a null pointer constant without causing false positives in the "
+           "safepoint ir verifier.");
   }
 }
 
@@ -1014,14 +940,13 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
                              const CallSite &CS,
                              PartiallyConstructedSafepointRecord &result) {
   DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
-  DenseSet<llvm::Value *> NewInsertedDefs;
-  findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs);
+  findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
 
   if (PrintBasePointers) {
     // Note: Need to print these in a stable order since this is checked in
     // some tests.
     errs() << "Base Pairs (w/o Relocation):\n";
-    SmallVector<Value*, 64> Temp;
+    SmallVector<Value *, 64> Temp;
     Temp.reserve(PointerToBase.size());
     for (auto Pair : PointerToBase) {
       Temp.push_back(Pair.first);
@@ -1029,90 +954,59 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
     std::sort(Temp.begin(), Temp.end(), order_by_name);
     for (Value *Ptr : Temp) {
       Value *Base = PointerToBase[Ptr];
-      errs() << " derived %" << Ptr->getName() << " base %"
-             << Base->getName() << "\n";
+      errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
+             << "\n";
     }
   }
 
   result.PointerToBase = PointerToBase;
-  result.NewInsertedDefs = NewInsertedDefs;
 }
 
-/// Check for liveness of items in the insert defs and add them to the live
-/// and base pointer sets
-static void fixupLiveness(DominatorTree &DT, const CallSite &CS,
-                          const DenseSet<Value *> &allInsertedDefs,
-                          PartiallyConstructedSafepointRecord &result) {
-  Instruction *inst = CS.getInstruction();
-
-  auto liveset = result.liveset;
-  auto PointerToBase = result.PointerToBase;
+/// Given an updated version of the dataflow liveness results, update the
+/// liveset and base pointer maps for the call site CS.
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &result);
 
-  auto is_live_gc_reference =
-      [&](Value &V) { return isLiveGCReferenceAt(V, inst, DT, nullptr); };
-
-  // For each new definition, check to see if a) the definition dominates the
-  // instruction we're interested in, and b) one of the uses of that definition
-  // is edge-reachable from the instruction we're interested in.  This is the
-  // same definition of liveness we used in the intial liveness analysis
-  for (Value *newDef : allInsertedDefs) {
-    if (liveset.count(newDef)) {
-      // already live, no action needed
-      continue;
-    }
-
-    // PERF: Use DT to check instruction domination might not be good for
-    // compilation time, and we could change to optimal solution if this
-    // turn to be a issue
-    if (!DT.dominates(cast<Instruction>(newDef), inst)) {
-      // can't possibly be live at inst
-      continue;
-    }
-
-    if (is_live_gc_reference(*newDef)) {
-      // Add the live new defs into liveset and PointerToBase
-      liveset.insert(newDef);
-      PointerToBase[newDef] = newDef;
-    }
-  }
-
-  result.liveset = liveset;
-  result.PointerToBase = PointerToBase;
-}
-
-static void fixupLiveReferences(
-    Function &F, DominatorTree &DT, Pass *P,
-    const DenseSet<llvm::Value *> &allInsertedDefs,
-    ArrayRef<CallSite> toUpdate,
+static void recomputeLiveInValues(
+    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  // TODO-PERF: reuse the original liveness, then simply run the dataflow
+  // again.  The old values are still live and will help it stablize quickly.
+  GCPtrLivenessData RevisedLivenessData;
+  computeLiveInValues(DT, F, RevisedLivenessData);
   for (size_t i = 0; i < records.size(); i++) {
     struct PartiallyConstructedSafepointRecord &info = records[i];
     const CallSite &CS = toUpdate[i];
-    fixupLiveness(DT, CS, allInsertedDefs, info);
+    recomputeLiveInValues(RevisedLivenessData, CS, info);
   }
 }
 
-// Normalize basic block to make it ready to be target of invoke statepoint.
-// It means spliting it to have single predecessor. Return newly created BB
-// ready to be successor of invoke statepoint.
-static BasicBlock *normalizeBBForInvokeSafepoint(BasicBlock *BB,
-                                                 BasicBlock *InvokeParent,
-                                                 Pass *P) {
-  BasicBlock *ret = BB;
-
+// When inserting gc.relocate calls, we need to ensure there are no uses
+// of the original value between the gc.statepoint and the gc.relocate call.
+// One case which can arise is a phi node starting one of the successor blocks.
+// We also need to be able to insert the gc.relocates only on the path which
+// goes through the statepoint.  We might need to split an edge to make this
+// possible.
+static BasicBlock *
+normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {
+  DominatorTree *DT = nullptr;
+  if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+    DT = &DTP->getDomTree();
+
+  BasicBlock *Ret = BB;
   if (!BB->getUniquePredecessor()) {
-    ret = SplitBlockPredecessors(BB, InvokeParent, "");
+    Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);
   }
 
-  // Another requirement for such basic blocks is to not have any phi nodes.
-  // Since we just ensured that new BB will have single predecessor,
-  // all phi nodes in it will have one value. Here it would be naturall place
-  // to
-  // remove them all. But we can not do this because we are risking to remove
-  // one of the values stored in liveset of another statepoint. We will do it
-  // later after placing all safepoints.
+  // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+  // from it
+  FoldSingleEntryPHINodes(Ret);
+  assert(!isa<PHINode>(Ret->begin()));
 
-  return ret;
+  // At this point, we can safely insert a gc.relocate as the first instruction
+  // in Ret if needed.
+  return Ret;
 }
 
 static int find_index(ArrayRef<Value *> livevec, Value *val) {
@@ -1180,7 +1074,7 @@ static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables,
     // combination.  This results is some blow up the function declarations in
     // the IR, but removes the need for argument bitcasts which shrinks the IR
     // greatly and makes it much more readable.
-    SmallVector<Type *, 1> types;                    // one per 'any' type
+    SmallVector<Type *, 1> types;                 // one per 'any' type
     types.push_back(liveVariables[i]->getType()); // result type
     Value *gc_relocate_decl = Intrinsic::getDeclaration(
         M, Intrinsic::experimental_gc_relocate, types);
@@ -1298,8 +1192,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
     token = invoke;
 
     // Generate gc relocates in exceptional path
-    BasicBlock *unwindBlock = normalizeBBForInvokeSafepoint(
-        toReplace->getUnwindDest(), invoke->getParent(), P);
+    BasicBlock *unwindBlock = toReplace->getUnwindDest();
+    assert(!isa<PHINode>(unwindBlock->begin()) &&
+           unwindBlock->getUniquePredecessor() &&
+           "can't safely insert in this block!");
 
     Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
     Builder.SetInsertPoint(IP);
@@ -1319,8 +1215,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
                             exceptional_token, Builder);
 
     // Generate gc relocates and returns for normal block
-    BasicBlock *normalDest = normalizeBBForInvokeSafepoint(
-        toReplace->getNormalDest(), invoke->getParent(), P);
+    BasicBlock *normalDest = toReplace->getNormalDest();
+    assert(!isa<PHINode>(normalDest->begin()) &&
+           normalDest->getUniquePredecessor() &&
+           "can't safely insert in this block!");
 
     IP = &*(normalDest->getFirstInsertionPt());
     Builder.SetInsertPoint(IP);
@@ -1333,7 +1231,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
   // Take the name of the original value call if it had one.
   token->takeName(CS.getInstruction());
 
-  // The GCResult is already inserted, we just need to find it
+// The GCResult is already inserted, we just need to find it
 #ifndef NDEBUG
   Instruction *toReplace = CS.getInstruction();
   assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
@@ -1351,7 +1249,6 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
 
   // Second, create a gc.relocate for every live variable
   CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
-
 }
 
 namespace {
@@ -1383,7 +1280,7 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
 
 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
 // which make the relocations happening at this safepoint explicit.
-// 
+//
 // WARNING: Does not do any fixup to adjust users of the original live
 // values.  That's the callers responsibility.
 static void
@@ -1458,14 +1355,13 @@ static void relocationViaAlloca(
     Function &F, DominatorTree &DT, ArrayRef<Value *> live,
     ArrayRef<struct PartiallyConstructedSafepointRecord> records) {
 #ifndef NDEBUG
-  int initialAllocaNum = 0;
-
-  // record initial number of allocas
-  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
-       itr++) {
-    if (isa<AllocaInst>(*itr))
-      initialAllocaNum++;
-  }
+  // record initial number of (static) allocas; we'll check we have the same
+  // number when we get done.
+  int InitialAllocaNum = 0;
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum++;
 #endif
 
   // TODO-PERF: change data structures, reserve
@@ -1505,47 +1401,49 @@ static void relocationViaAlloca(
     // In case if it was invoke statepoint
     // we will insert stores for exceptional path gc relocates.
     if (isa<InvokeInst>(Statepoint)) {
-      insertRelocationStores(info.UnwindToken->users(),
-                             allocaMap, visitedLiveValues);
+      insertRelocationStores(info.UnwindToken->users(), allocaMap,
+                             visitedLiveValues);
     }
 
-#ifndef NDEBUG
-    // As a debuging aid, pretend that an unrelocated pointer becomes null at
-    // the gc.statepoint.  This will turn some subtle GC problems into slightly
-    // easier to debug SEGVs
-    SmallVector<AllocaInst *, 64> ToClobber;
-    for (auto Pair : allocaMap) {
-      Value *Def = Pair.first;
-      AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
-
-      // This value was relocated
-      if (visitedLiveValues.count(Def)) {
-        continue;
+    if (ClobberNonLive) {
+      // As a debuging aid, pretend that an unrelocated pointer becomes null at
+      // the gc.statepoint.  This will turn some subtle GC problems into
+      // slightly easier to debug SEGVs.  Note that on large IR files with
+      // lots of gc.statepoints this is extremely costly both memory and time
+      // wise.
+      SmallVector<AllocaInst *, 64> ToClobber;
+      for (auto Pair : allocaMap) {
+        Value *Def = Pair.first;
+        AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+
+        // This value was relocated
+        if (visitedLiveValues.count(Def)) {
+          continue;
+        }
+        ToClobber.push_back(Alloca);
       }
-      ToClobber.push_back(Alloca);
-    }
 
-    auto InsertClobbersAt = [&](Instruction *IP) {
-      for (auto *AI : ToClobber) {
-        auto AIType = cast<PointerType>(AI->getType());
-        auto PT = cast<PointerType>(AIType->getElementType());
-        Constant *CPN = ConstantPointerNull::get(PT);
-        StoreInst *store = new StoreInst(CPN, AI);
-        store->insertBefore(IP);
-      }
-    };
+      auto InsertClobbersAt = [&](Instruction *IP) {
+        for (auto *AI : ToClobber) {
+          auto AIType = cast<PointerType>(AI->getType());
+          auto PT = cast<PointerType>(AIType->getElementType());
+          Constant *CPN = ConstantPointerNull::get(PT);
+          StoreInst *store = new StoreInst(CPN, AI);
+          store->insertBefore(IP);
+        }
+      };
 
-    // Insert the clobbering stores.  These may get intermixed with the
-    // gc.results and gc.relocates, but that's fine.  
-    if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
-      InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
-      InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
-    } else {
-      BasicBlock::iterator Next(cast<CallInst>(Statepoint));
-      Next++;
-      InsertClobbersAt(Next);
+      // Insert the clobbering stores.  These may get intermixed with the
+      // gc.results and gc.relocates, but that's fine.
+      if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
+        InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
+        InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+      } else {
+        BasicBlock::iterator Next(cast<CallInst>(Statepoint));
+        Next++;
+        InsertClobbersAt(Next);
+      }
     }
-#endif
   }
   // update use with load allocas and add store for gc_relocated
   for (auto Pair : allocaMap) {
@@ -1603,11 +1501,11 @@ static void relocationViaAlloca(
         assert(!inst->isTerminator() &&
                "The only TerminatorInst that can produce a value is "
                "InvokeInst which is handled above.");
-         store->insertAfter(inst);
+        store->insertAfter(inst);
       }
     } else {
       assert((isa<Argument>(def) || isa<GlobalVariable>(def) ||
-              (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) &&
+              isa<ConstantPointerNull>(def)) &&
              "Must be argument or global");
       store->insertAfter(cast<Instruction>(alloca));
     }
@@ -1621,12 +1519,11 @@ static void relocationViaAlloca(
   }
 
 #ifndef NDEBUG
-  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
-       itr++) {
-    if (isa<AllocaInst>(*itr))
-      initialAllocaNum--;
-  }
-  assert(initialAllocaNum == 0 && "We must not introduce any extra allocas");
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum--;
+  assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
 #endif
 }
 
@@ -1647,76 +1544,155 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
   }
 }
 
-static Function *getUseHolder(Module &M) {
-  FunctionType *ftype =
-      FunctionType::get(Type::getVoidTy(M.getContext()), true);
-  Function *Func = cast<Function>(M.getOrInsertFunction("__tmp_use", ftype));
-  return Func;
-}
-
 /// Insert holders so that each Value is obviously live through the entire
-/// liftetime of the call.
+/// lifetime of the call.
 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
-                                 SmallVectorImpl<CallInst *> &holders) {
+                                 SmallVectorImpl<CallInst *> &Holders) {
+  if (Values.empty())
+    // No values to hold live, might as well not insert the empty holder
+    return;
+
   Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
-  Function *Func = getUseHolder(*M);
+  // Use a dummy vararg function to actually hold the values live
+  Function *Func = cast<Function>(M->getOrInsertFunction(
+      "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
   if (CS.isCall()) {
     // For call safepoints insert dummy calls right after safepoint
-    BasicBlock::iterator next(CS.getInstruction());
-    next++;
-    CallInst *base_holder = CallInst::Create(Func, Values, "", next);
-    holders.push_back(base_holder);
-  } else if (CS.isInvoke()) {
-    // For invoke safepooints insert dummy calls both in normal and
-    // exceptional destination blocks
-    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
-    CallInst *normal_holder = CallInst::Create(
-        Func, Values, "", invoke->getNormalDest()->getFirstInsertionPt());
-    CallInst *unwind_holder = CallInst::Create(
-        Func, Values, "", invoke->getUnwindDest()->getFirstInsertionPt());
-    holders.push_back(normal_holder);
-    holders.push_back(unwind_holder);
-  } else
-    llvm_unreachable("unsupported call type");
+    BasicBlock::iterator Next(CS.getInstruction());
+    Next++;
+    Holders.push_back(CallInst::Create(Func, Values, "", Next));
+    return;
+  }
+  // For invoke safepooints insert dummy calls both in normal and
+  // exceptional destination blocks
+  auto *II = cast<InvokeInst>(CS.getInstruction());
+  Holders.push_back(CallInst::Create(
+      Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+  Holders.push_back(CallInst::Create(
+      Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
 }
 
 static void findLiveReferences(
     Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  GCPtrLivenessData OriginalLivenessData;
+  computeLiveInValues(DT, F, OriginalLivenessData);
   for (size_t i = 0; i < records.size(); i++) {
     struct PartiallyConstructedSafepointRecord &info = records[i];
     const CallSite &CS = toUpdate[i];
-    analyzeParsePointLiveness(DT, CS, info);
+    analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
   }
 }
 
-static void addBasesAsLiveValues(StatepointLiveSetTy &liveset,
-                                 DenseMap<Value *, Value *> &PointerToBase) {
-  // Identify any base pointers which are used in this safepoint, but not
-  // themselves relocated.  We need to relocate them so that later inserted
-  // safepoints can get the properly relocated base register.
-  DenseSet<Value *> missing;
-  for (Value *L : liveset) {
-    assert(PointerToBase.find(L) != PointerToBase.end());
-    Value *base = PointerToBase[L];
-    assert(base);
-    if (liveset.find(base) == liveset.end()) {
-      assert(PointerToBase.find(base) == PointerToBase.end());
-      // uniqued by set insert
-      missing.insert(base);
+/// Remove any vector of pointers from the liveset by scalarizing them over the
+/// statepoint instruction.  Adds the scalarized pieces to the liveset.  It
+/// would be preferrable to include the vector in the statepoint itself, but
+/// the lowering code currently does not handle that.  Extending it would be
+/// slightly non-trivial since it requires a format change.  Given how rare
+/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+static void splitVectorValues(Instruction *StatepointInst,
+                              StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+  SmallVector<Value *, 16> ToSplit;
+  for (Value *V : LiveSet)
+    if (isa<VectorType>(V->getType()))
+      ToSplit.push_back(V);
+
+  if (ToSplit.empty())
+    return;
+
+  Function &F = *(StatepointInst->getParent()->getParent());
+
+  DenseMap<Value *, AllocaInst *> AllocaMap;
+  // First is normal return, second is exceptional return (invoke only)
+  DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
+  for (Value *V : ToSplit) {
+    LiveSet.erase(V);
+
+    AllocaInst *Alloca =
+        new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
+    AllocaMap[V] = Alloca;
+
+    VectorType *VT = cast<VectorType>(V->getType());
+    IRBuilder<> Builder(StatepointInst);
+    SmallVector<Value *, 16> Elements;
+    for (unsigned i = 0; i < VT->getNumElements(); i++)
+      Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
+    LiveSet.insert(Elements.begin(), Elements.end());
+
+    auto InsertVectorReform = [&](Instruction *IP) {
+      Builder.SetInsertPoint(IP);
+      Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+      Value *ResultVec = UndefValue::get(VT);
+      for (unsigned i = 0; i < VT->getNumElements(); i++)
+        ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
+                                                Builder.getInt32(i));
+      return ResultVec;
+    };
+
+    if (isa<CallInst>(StatepointInst)) {
+      BasicBlock::iterator Next(StatepointInst);
+      Next++;
+      Instruction *IP = &*(Next);
+      Replacements[V].first = InsertVectorReform(IP);
+      Replacements[V].second = nullptr;
+    } else {
+      InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
+      // We've already normalized - check that we don't have shared destination
+      // blocks
+      BasicBlock *NormalDest = Invoke->getNormalDest();
+      assert(!isa<PHINode>(NormalDest->begin()));
+      BasicBlock *UnwindDest = Invoke->getUnwindDest();
+      assert(!isa<PHINode>(UnwindDest->begin()));
+      // Insert insert element sequences in both successors
+      Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+      Replacements[V].first = InsertVectorReform(IP);
+      IP = &*(UnwindDest->getFirstInsertionPt());
+      Replacements[V].second = InsertVectorReform(IP);
     }
   }
+  for (Value *V : ToSplit) {
+    AllocaInst *Alloca = AllocaMap[V];
+
+    // Capture all users before we start mutating use lists
+    SmallVector<Instruction *, 16> Users;
+    for (User *U : V->users())
+      Users.push_back(cast<Instruction>(U));
+
+    for (Instruction *I : Users) {
+      if (auto Phi = dyn_cast<PHINode>(I)) {
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
+          if (V == Phi->getIncomingValue(i)) {
+            LoadInst *Load = new LoadInst(
+                Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+            Phi->setIncomingValue(i, Load);
+          }
+      } else {
+        LoadInst *Load = new LoadInst(Alloca, "", I);
+        I->replaceUsesOfWith(V, Load);
+      }
+    }
 
-  // Note that we want these at the end of the list, otherwise
-  // register placement gets screwed up once we lower to STATEPOINT
-  // instructions.  This is an utter hack, but there doesn't seem to be a
-  // better one.
-  for (Value *base : missing) {
-    assert(base);
-    liveset.insert(base);
-    PointerToBase[base] = base;
-  }
-  assert(liveset.size() == PointerToBase.size());
+    // Store the original value and the replacement value into the alloca
+    StoreInst *Store = new StoreInst(V, Alloca);
+    if (auto I = dyn_cast<Instruction>(V))
+      Store->insertAfter(I);
+    else
+      Store->insertAfter(Alloca);
+
+    // Normal return for invoke, or call return
+    Instruction *Replacement = cast<Instruction>(Replacements[V].first);
+    (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+    // Unwind return for invoke only
+    Replacement = cast_or_null<Instruction>(Replacements[V].second);
+    if (Replacement)
+      (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+  }
+
+  // apply mem2reg to promote alloca to SSA
+  SmallVector<AllocaInst *, 16> Allocas;
+  for (Value *V : ToSplit)
+    Allocas.push_back(AllocaMap[V]);
+  PromoteMemToReg(Allocas, DT);
 }
 
 static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
@@ -1734,6 +1710,20 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   }
 #endif
 
+  // When inserting gc.relocates for invokes, we need to be able to insert at
+  // the top of the successor blocks.  See the comment on
+  // normalForInvokeSafepoint on exactly what is needed.  Note that this step
+  // may restructure the CFG.
+  for (CallSite CS : toUpdate) {
+    if (!CS.isInvoke())
+      continue;
+    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
+    normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
+                                P);
+    normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
+                                P);
+  }
+
   // A list of dummy calls added to the IR to keep various values obviously
   // live in the IR.  We'll remove all of these when done.
   SmallVector<CallInst *, 64> holders;
@@ -1749,7 +1739,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
     SmallVector<Value *, 64> DeoptValues;
     for (Use &U : StatepointCS.vm_state_args()) {
       Value *Arg = cast<Value>(&U);
-      if (isGCPointerType(Arg->getType()))
+      assert(!isUnhandledGCPointerType(Arg->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(Arg->getType()))
         DeoptValues.push_back(Arg);
     }
     insertUseHolderAfter(CS, DeoptValues, holders);
@@ -1767,6 +1759,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   // site.
   findLiveReferences(F, DT, P, toUpdate, records);
 
+  // Do a limited scalarization of any live at safepoint vector values which
+  // contain pointers.  This enables this pass to run after vectorization at
+  // the cost of some possible performance loss.  TODO: it would be nice to
+  // natively support vectors all the way through the backend so we don't need
+  // to scalarize here.
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    Instruction *statepoint = toUpdate[i].getInstruction();
+    splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
+  }
+
   // B) Find the base pointers for each live pointer
   /* scope for caching */ {
     // Cache the 'defining value' relation used in the computation and
@@ -1790,13 +1793,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   //   gep a + 1
   //   safepoint 2
   //   br loop
-  DenseSet<llvm::Value *> allInsertedDefs;
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    allInsertedDefs.insert(info.NewInsertedDefs.begin(),
-                           info.NewInsertedDefs.end());
-  }
-
   // We insert some dummy calls after each safepoint to definitely hold live
   // the base pointers which were identified for that safepoint.  We'll then
   // ask liveness for _every_ base inserted to see what is now live.  Then we
@@ -1813,22 +1809,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
     insertUseHolderAfter(CS, Bases, holders);
   }
 
-  // Add the bases explicitly to the live vector set.  This may result in a few
-  // extra relocations, but the base has to be available whenever a pointer
-  // derived from it is used.  Thus, we need it to be part of the statepoint's
-  // gc arguments list.  TODO: Introduce an explicit notion (in the following
-  // code) of the GC argument list as seperate from the live Values at a
-  // given statepoint.
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    addBasesAsLiveValues(info.liveset, info.PointerToBase);
-  }
+  // By selecting base pointers, we've effectively inserted new uses. Thus, we
+  // need to rerun liveness.  We may *also* have inserted new defs, but that's
+  // not the key issue.
+  recomputeLiveInValues(F, DT, P, toUpdate, records);
 
-  // If we inserted any new values, we need to adjust our notion of what is
-  // live at a particular safepoint.
-  if (!allInsertedDefs.empty()) {
-    fixupLiveReferences(F, DT, P, allInsertedDefs, toUpdate, records);
-  }
   if (PrintBasePointers) {
     for (size_t i = 0; i < records.size(); i++) {
       struct PartiallyConstructedSafepointRecord &info = records[i];
@@ -1858,25 +1843,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   }
   toUpdate.clear(); // prevent accident use of invalid CallSites
 
-  // In case if we inserted relocates in a different basic block than the
-  // original safepoint (this can happen for invokes). We need to be sure that
-  // original values were not used in any of the phi nodes at the
-  // beginning of basic block containing them. Because we know that all such
-  // blocks will have single predecessor we can safely assume that all phi
-  // nodes have single entry (because of normalizeBBForInvokeSafepoint).
-  // Just remove them all here.
-  for (size_t i = 0; i < records.size(); i++) {
-    Instruction *I = records[i].StatepointToken;
-
-    if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
-      FoldSingleEntryPHINodes(invoke->getNormalDest());
-      assert(!isa<PHINode>(invoke->getNormalDest()->begin()));
-
-      FoldSingleEntryPHINodes(invoke->getUnwindDest());
-      assert(!isa<PHINode>(invoke->getUnwindDest()->begin()));
-    }
-  }
-
   // Do all the fixups of the original live variables to their relocated selves
   SmallVector<Value *, 128> live;
   for (size_t i = 0; i < records.size(); i++) {
@@ -1889,6 +1855,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
     Statepoint statepoint(info.StatepointToken);
     live.insert(live.end(), statepoint.gc_args_begin(),
                 statepoint.gc_args_end());
+#ifndef NDEBUG
+    // Do some basic sanity checks on our liveness results before performing
+    // relocation.  Relocation can and will turn mistakes in liveness results
+    // into non-sensical code which is must harder to debug.
+    // TODO: It would be nice to test consistency as well
+    assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+           "statepoint must be reachable or liveness is meaningless");
+    for (Value *V : statepoint.gc_args()) {
+      if (!isa<Instruction>(V))
+        // Non-instruction values trivial dominate all possible uses
+        continue;
+      auto LiveInst = cast<Instruction>(V);
+      assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
+             "unreachable values should never be live");
+      assert(DT.dominates(LiveInst, info.StatepointToken) &&
+             "basic SSA liveness expectation violated by liveness analysis");
+    }
+#endif
   }
   unique_unsorted(live);
 
@@ -1924,18 +1908,285 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
   if (!shouldRewriteStatepointsIn(F))
     return false;
 
-  // Gather all the statepoints which need rewritten.
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  // Gather all the statepoints which need rewritten.  Be careful to only
+  // consider those in reachable code since we need to ask dominance queries
+  // when rewriting.  We'll delete the unreachable ones in a moment.
   SmallVector<CallSite, 64> ParsePointNeeded;
+  bool HasUnreachableStatepoint = false;
   for (Instruction &I : inst_range(F)) {
     // TODO: only the ones with the flag set!
-    if (isStatepoint(I))
-      ParsePointNeeded.push_back(CallSite(&I));
+    if (isStatepoint(I)) {
+      if (DT.isReachableFromEntry(I.getParent()))
+        ParsePointNeeded.push_back(CallSite(&I));
+      else
+        HasUnreachableStatepoint = true;
+    }
   }
 
+  bool MadeChange = false;
+
+  // Delete any unreachable statepoints so that we don't have unrewritten
+  // statepoints surviving this pass.  This makes testing easier and the
+  // resulting IR less confusing to human readers.  Rather than be fancy, we
+  // just reuse a utility function which removes the unreachable blocks.
+  if (HasUnreachableStatepoint)
+    MadeChange |= removeUnreachableBlocks(F);
+
   // Return early if no work to do.
   if (ParsePointNeeded.empty())
-    return false;
+    return MadeChange;
+
+  // As a prepass, go ahead and aggressively destroy single entry phi nodes.
+  // These are created by LCSSA.  They have the effect of increasing the size
+  // of liveness sets for no good reason.  It may be harder to do this post
+  // insertion since relocations and base phis can confuse things.
+  for (BasicBlock &BB : F)
+    if (BB.getUniquePredecessor()) {
+      MadeChange = true;
+      FoldSingleEntryPHINodes(&BB);
+    }
 
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  return insertParsePoints(F, DT, this, ParsePointNeeded);
+  MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+  return MadeChange;
+}
+
+// liveness computation via standard dataflow
+// -------------------------------------------------------------------
+
+// TODO: Consider using bitvectors for liveness, the set of potentially
+// interesting values should be small and easy to pre-compute.
+
+/// Is this value a constant consisting of entirely null values?
+static bool isConstantNull(Value *V) {
+  return isa<Constant>(V) && cast<Constant>(V)->isNullValue();
+}
+
+/// Compute the live-in set for the location rbegin starting from
+/// the live-out set of the basic block
+static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
+                                BasicBlock::reverse_iterator rend,
+                                DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
+    Instruction *I = &*ritr;
+
+    // KILL/Def - Remove this definition from LiveIn
+    LiveTmp.erase(I);
+
+    // Don't consider *uses* in PHI nodes, we handle their contribution to
+    // predecessor blocks when we seed the LiveOut sets
+    if (isa<PHINode>(I))
+      continue;
+
+    // USE - Add to the LiveIn set for this instruction
+    for (Value *V : I->operands()) {
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+          !isa<UndefValue>(V)) {
+        // The choice to exclude null and undef is arbitrary here.  Reconsider?
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock *Succ : successors(BB)) {
+    const BasicBlock::iterator E(Succ->getFirstNonPHI());
+    for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
+      PHINode *Phi = cast<PHINode>(&*I);
+      Value *V = Phi->getIncomingValueForBlock(BB);
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+          !isa<UndefValue>(V)) {
+        // The choice to exclude null and undef is arbitrary here.  Reconsider?
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
+  DenseSet<Value *> KillSet;
+  for (Instruction &I : *BB)
+    if (isHandledGCPointerType(I.getType()))
+      KillSet.insert(&I);
+  return KillSet;
+}
+
+#ifndef NDEBUG
+/// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
+/// sanity check for the liveness computation.
+static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
+                          TerminatorInst *TI, bool TermOkay = false) {
+  for (Value *V : Live) {
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      // The terminator can be a member of the LiveOut set.  LLVM's definition
+      // of instruction dominance states that V does not dominate itself.  As
+      // such, we need to special case this to allow it.
+      if (TermOkay && TI == I)
+        continue;
+      assert(DT.dominates(I, TI) &&
+             "basic SSA liveness expectation violated by liveness analysis");
+    }
+  }
+}
+
+/// Check that all the liveness sets used during the computation of liveness
+/// obey basic SSA properties.  This is useful for finding cases where we miss
+/// a def.
+static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
+                          BasicBlock &BB) {
+  checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
+  checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
+  checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
+}
+#endif
+
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data) {
+
+  SmallSetVector<BasicBlock *, 200> Worklist;
+  auto AddPredsToWorklist = [&](BasicBlock *BB) {
+    // We use a SetVector so that we don't have duplicates in the worklist.
+    Worklist.insert(pred_begin(BB), pred_end(BB));
+  };
+  auto NextItem = [&]() {
+    BasicBlock *BB = Worklist.back();
+    Worklist.pop_back();
+    return BB;
+  };
+
+  // Seed the liveness for each individual block
+  for (BasicBlock &BB : F) {
+    Data.KillSet[&BB] = computeKillSet(&BB);
+    Data.LiveSet[&BB].clear();
+    computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
+
+#ifndef NDEBUG
+    for (Value *Kill : Data.KillSet[&BB])
+      assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
+#endif
+
+    Data.LiveOut[&BB] = DenseSet<Value *>();
+    computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
+    Data.LiveIn[&BB] = Data.LiveSet[&BB];
+    set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
+    set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
+    if (!Data.LiveIn[&BB].empty())
+      AddPredsToWorklist(&BB);
+  }
+
+  // Propagate that liveness until stable
+  while (!Worklist.empty()) {
+    BasicBlock *BB = NextItem();
+
+    // Compute our new liveout set, then exit early if it hasn't changed
+    // despite the contribution of our successor.
+    DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+    const auto OldLiveOutSize = LiveOut.size();
+    for (BasicBlock *Succ : successors(BB)) {
+      assert(Data.LiveIn.count(Succ));
+      set_union(LiveOut, Data.LiveIn[Succ]);
+    }
+    // assert OutLiveOut is a subset of LiveOut
+    if (OldLiveOutSize == LiveOut.size()) {
+      // If the sets are the same size, then we didn't actually add anything
+      // when unioning our successors LiveIn  Thus, the LiveIn of this block
+      // hasn't changed.
+      continue;
+    }
+    Data.LiveOut[BB] = LiveOut;
+
+    // Apply the effects of this basic block
+    DenseSet<Value *> LiveTmp = LiveOut;
+    set_union(LiveTmp, Data.LiveSet[BB]);
+    set_subtract(LiveTmp, Data.KillSet[BB]);
+
+    assert(Data.LiveIn.count(BB));
+    const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
+    // assert: OldLiveIn is a subset of LiveTmp
+    if (OldLiveIn.size() != LiveTmp.size()) {
+      Data.LiveIn[BB] = LiveTmp;
+      AddPredsToWorklist(BB);
+    }
+  } // while( !worklist.empty() )
+
+#ifndef NDEBUG
+  // Sanity check our ouput against SSA properties.  This helps catch any
+  // missing kills during the above iteration.
+  for (BasicBlock &BB : F) {
+    checkBasicSSA(DT, Data, BB);
+  }
+#endif
+}
+
+static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &Out) {
+
+  BasicBlock *BB = Inst->getParent();
+
+  // Note: The copy is intentional and required
+  assert(Data.LiveOut.count(BB));
+  DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+
+  // We want to handle the statepoint itself oddly.  It's
+  // call result is not live (normal), nor are it's arguments
+  // (unless they're used again later).  This adjustment is
+  // specifically what we need to relocate
+  BasicBlock::reverse_iterator rend(Inst);
+  computeLiveInValues(BB->rbegin(), rend, LiveOut);
+  LiveOut.erase(Inst);
+  Out.insert(LiveOut.begin(), LiveOut.end());
+}
+
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &Info) {
+  Instruction *Inst = CS.getInstruction();
+  StatepointLiveSetTy Updated;
+  findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+
+#ifndef NDEBUG
+  DenseSet<Value *> Bases;
+  for (auto KVPair : Info.PointerToBase) {
+    Bases.insert(KVPair.second);
+  }
+#endif
+  // We may have base pointers which are now live that weren't before.  We need
+  // to update the PointerToBase structure to reflect this.
+  for (auto V : Updated)
+    if (!Info.PointerToBase.count(V)) {
+      assert(Bases.count(V) && "can't find base for unexpected live value");
+      Info.PointerToBase[V] = V;
+      continue;
+    }
+
+#ifndef NDEBUG
+  for (auto V : Updated) {
+    assert(Info.PointerToBase.count(V) &&
+           "must be able to find base for live value");
+  }
+#endif
+
+  // Remove any stale base mappings - this can happen since our liveness is
+  // more precise then the one inherent in the base pointer analysis
+  DenseSet<Value *> ToErase;
+  for (auto KVPair : Info.PointerToBase)
+    if (!Updated.count(KVPair.first))
+      ToErase.insert(KVPair.first);
+  for (auto V : ToErase)
+    Info.PointerToBase.erase(V);
+
+#ifndef NDEBUG
+  for (auto KVPair : Info.PointerToBase)
+    assert(Updated.count(KVPair.first) && "record for non-live value");
+#endif
+
+  Info.liveset = Updated;
 }
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 875a007..bc068f7 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
 
   Constant *Ptr = Operands[0];
   auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
-  markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
+  markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
+                                                  Indices));
 }
 
 void SCCPSolver::visitStoreInst(StoreInst &SI) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 06b000f..59dc528 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1166,10 +1166,9 @@ public:
       } else {
         continue;
       }
-      Instruction *DbgVal =
-          DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
-                                      DIExpression(DVI->getExpression()), Inst);
-      DbgVal->setDebugLoc(DVI->getDebugLoc());
+      DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+                                  DIExpression(DVI->getExpression()),
+                                  DVI->getDebugLoc(), Inst);
     }
   }
 };
@@ -1552,7 +1551,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
   if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
     return BasePtr;
 
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
+  return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
+                               NamePrefix + "sroa_idx");
 }
 
 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1803,7 +1803,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
 
     OffsetPtr = Int8PtrOffset == 0
                     ? Int8Ptr
-                    : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+                    : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
+                                            IRB.getInt(Int8PtrOffset),
                                             NamePrefix + "sroa_raw_idx");
   }
   Ptr = OffsetPtr;
@@ -3250,7 +3251,8 @@ private:
     void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
       assert(Ty->isSingleValueType());
       // Load the single value and insert it using the indices.
-      Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+      Value *GEP =
+          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
       Value *Load = IRB.CreateLoad(GEP, Name + ".load");
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       DEBUG(dbgs() << "          to: " << *Load << "\n");
@@ -3283,7 +3285,7 @@ private:
       // Extract the single value and store it using the indices.
       Value *Store = IRB.CreateStore(
           IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
-          IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
       (void)Store;
       DEBUG(dbgs() << "          to: " << *Store << "\n");
     }
@@ -4188,14 +4190,14 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
       // Create a piece expression describing the new partition or reuse AI's
       // expression if there is only one partition.
       DIExpression PieceExpr = Expr;
-      if (IsSplit || Expr.isBitPiece()) {
+      if (IsSplit || Expr->isBitPiece()) {
         // If this alloca is already a scalar replacement of a larger aggregate,
         // Piece.Offset describes the offset inside the scalar.
-        uint64_t Offset = Expr.isBitPiece() ? Expr.getBitPieceOffset() : 0;
+        uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
         uint64_t Start = Offset + Piece.Offset;
         uint64_t Size = Piece.Size;
-        if (Expr.isBitPiece()) {
-          uint64_t AbsEnd = Expr.getBitPieceOffset() + Expr.getBitPieceSize();
+        if (Expr->isBitPiece()) {
+          uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
           if (Start >= AbsEnd)
             // No need to describe a SROAed padding.
             continue;
@@ -4208,8 +4210,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
       if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
         OldDDI->eraseFromParent();
 
-      auto *NewDDI = DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, &AI);
-      NewDDI->setDebugLoc(DbgDecl->getDebugLoc());
+      DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
+                        &AI);
     }
   }
   return Changed;
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 3e7cf04..f99fe3f 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -217,16 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
 /// \returns The profiled weight of I.
 unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
   DebugLoc DLoc = Inst.getDebugLoc();
-  if (DLoc.isUnknown())
+  if (!DLoc)
     return 0;
 
   unsigned Lineno = DLoc.getLine();
   if (Lineno < HeaderLineno)
     return 0;
 
-  DILocation DIL(DLoc.getAsMDNode(*Ctx));
+  DILocation DIL = DLoc.get();
   int LOffset = Lineno - HeaderLineno;
-  unsigned Discriminator = DIL.getDiscriminator();
+  unsigned Discriminator = DIL->getDiscriminator();
   unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
   DEBUG(dbgs() << "    " << Lineno << "." << Discriminator << ":" << Inst
                << " (line offset: " << LOffset << "." << Discriminator
@@ -642,9 +642,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
 /// \returns the line number where \p F is defined. If it returns 0,
 ///          it means that there is no debug information available for \p F.
 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
-  DISubprogram S = getDISubprogram(&F);
-  if (S.isSubprogram())
-    return S.getLineNumber();
+  if (MDSubprogram *S = getDISubprogram(&F))
+    return S->getLine();
 
   // If could not find the start of \p F, emit a diagnostic to inform the user
   // about the missed opportunity.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 6cc8411..42095ae 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
   initializeMergedLoadStoreMotionPass(Registry);
+  initializeNaryReassociatePass(Registry);
   initializePartiallyInlineLibCallsPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
@@ -77,6 +78,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoadCombinePass(Registry);
   initializePlaceBackedgeSafepointsImplPass(Registry);
   initializePlaceSafepointsPass(Registry);
+  initializeFloat2IntPass(Registry);
 }
 
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index acd8585..693c5ae 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1117,10 +1117,9 @@ public:
       } else {
         continue;
       }
-      Instruction *DbgVal = DIB->insertDbgValueIntrinsic(
-          Arg, 0, DIVariable(DVI->getVariable()),
-          DIExpression(DVI->getExpression()), Inst);
-      DbgVal->setDebugLoc(DVI->getDebugLoc());
+      DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+                                   DIExpression(DVI->getExpression()),
+                                   DVI->getDebugLoc(), Inst);
     }
   }
 };
@@ -2135,7 +2134,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
     // split the alloca again later.
     unsigned AS = AI->getType()->getAddressSpace();
     Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
-    V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+    V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));
 
     IdxTy = NewElts[Idx]->getAllocatedType();
     uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index a457cba..d55dc6a 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -213,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) {
       CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
     }
     if (I != 0)
-      CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
+      CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
                                          V->getName() + ".i" + Twine(I));
   } else {
     // Search through a chain of InsertElementInsts looking for element I.
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 1a04d74..8af4753 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -757,14 +757,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
         }
       }
       // Create an ugly GEP with a single index for each index.
-      ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep");
+      ResultPtr =
+          Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
     }
   }
 
   // Create a GEP with the constant offset index.
   if (AccumulativeByteOffset != 0) {
     Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
-    ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep");
+    ResultPtr =
+        Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
   }
   if (ResultPtr->getType() != Variadic->getType())
     ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index e71031c..2fc9368 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -15,42 +15,46 @@
 //
 // There are many optimizations we can perform in the domain of SLSR. This file
 // for now contains only an initial step. Specifically, we look for strength
-// reduction candidates in two forms:
+// reduction candidates in the following forms:
 //
-// Form 1: (B + i) * S
-// Form 2: &B[i * S]
+// Form 1: B + i * S
+// Form 2: (B + i) * S
+// Form 3: &B[i * S]
 //
 // where S is an integer variable, and i is a constant integer. If we found two
-// candidates
+// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
+// in a simpler way with respect to S1. For example,
 //
-// S1: X = (B + i) * S
-// S2: Y = (B + i') * S
+// S1: X = B + i * S
+// S2: Y = B + i' * S   => X + (i' - i) * S
 //
-// or
+// S1: X = (B + i) * S
+// S2: Y = (B + i') * S => X + (i' - i) * S
 //
 // S1: X = &B[i * S]
-// S2: Y = &B[i' * S]
-//
-// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with
+// S2: Y = &B[i' * S]   => &X[(i' - i) * S]
 //
-// Y = X + (i' - i) * S
+// Note: (i' - i) * S is folded to the extent possible.
 //
-// or
+// This rewriting is in general a good idea. The code patterns we focus on
+// usually come from loop unrolling, so (i' - i) * S is likely the same
+// across iterations and can be reused. When that happens, the optimized form
+// takes only one add starting from the second iteration.
 //
-// Y = &X[(i' - i) * S]
-//
-// where (i' - i) * S is folded to the extent possible. When S2 has multiple
-// bases, we pick the one that is closest to S2, or S2's "immediate" basis.
+// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
+// multiple bases, we choose to rewrite S2 with respect to its "immediate"
+// basis, the basis that is the closest ancestor in the dominator tree.
 //
 // TODO:
 //
-// - Handle candidates in the form of B + i * S
-//
 // - Floating point arithmetics when fast math is enabled.
 //
 // - SLSR may decrease ILP at the architecture level. Targets that are very
 //   sensitive to ILP may want to disable it. Having SLSR to consider ILP is
 //   left as future work.
+//
+// - When (i' - i) is constant but i and i' are not, we could still perform
+//   SLSR.
 #include <vector>
 
 #include "llvm/ADT/DenseSet.h"
@@ -72,13 +76,12 @@ namespace {
 
 class StraightLineStrengthReduce : public FunctionPass {
 public:
-  // SLSR candidate. Such a candidate must be in the form of
-  //   (Base + Index) * Stride
-  // or
-  //   Base[..][Index * Stride][..]
+  // SLSR candidate. Such a candidate must be in one of the forms described in
+  // the header comments.
   struct Candidate : public ilist_node<Candidate> {
     enum Kind {
       Invalid, // reserved for the default constructor
+      Add,     // B + i * S
       Mul,     // (B + i) * S
       GEP,     // &B[..][i * S][..]
     };
@@ -92,14 +95,14 @@ public:
           Basis(nullptr) {}
     Kind CandidateKind;
     const SCEV *Base;
-    // Note that Index and Stride of a GEP candidate may not have the same
-    // integer type. In that case, during rewriting, Stride will be
+    // Note that Index and Stride of a GEP candidate do not necessarily have the
+    // same integer type. In that case, during rewriting, Stride will be
     // sign-extended or truncated to Index's type.
     ConstantInt *Index;
     Value *Stride;
     // The instruction this candidate corresponds to. It helps us to rewrite a
     // candidate with respect to its immediate basis. Note that one instruction
-    // can corresponds to multiple candidates depending on how you associate the
+    // can correspond to multiple candidates depending on how you associate the
     // expression. For instance,
     //
     // (a + 1) * (b + 2)
@@ -143,31 +146,43 @@ private:
   // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
   // share the same base and stride.
   bool isBasisFor(const Candidate &Basis, const Candidate &C);
+  // Returns whether the candidate can be folded into an addressing mode.
+  bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
+                  const DataLayout *DL);
+  // Returns true if C is already in a simplest form and not worth being
+  // rewritten.
+  bool isSimplestForm(const Candidate &C);
   // Checks whether I is in a candidate form. If so, adds all the matching forms
   // to Candidates, and tries to find the immediate basis for each of them.
-  void allocateCandidateAndFindBasis(Instruction *I);
+  void allocateCandidatesAndFindBasis(Instruction *I);
+  // Allocate candidates and find bases for Add instructions.
+  void allocateCandidatesAndFindBasisForAdd(Instruction *I);
+  // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
+  // candidate.
+  void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
+                                            Instruction *I);
   // Allocate candidates and find bases for Mul instructions.
-  void allocateCandidateAndFindBasisForMul(Instruction *I);
+  void allocateCandidatesAndFindBasisForMul(Instruction *I);
   // Splits LHS into Base + Index and, if succeeds, calls
-  // allocateCandidateAndFindBasis.
-  void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS,
-                                           Instruction *I);
+  // allocateCandidatesAndFindBasis.
+  void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
+                                            Instruction *I);
   // Allocate candidates and find bases for GetElementPtr instructions.
-  void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP);
+  void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
   // A helper function that scales Idx with ElementSize before invoking
-  // allocateCandidateAndFindBasis.
-  void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
-                                           Value *S, uint64_t ElementSize,
-                                           Instruction *I);
+  // allocateCandidatesAndFindBasis.
+  void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
+                                            Value *S, uint64_t ElementSize,
+                                            Instruction *I);
   // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
   // basis.
-  void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B,
-                                     ConstantInt *Idx, Value *S,
-                                     Instruction *I);
+  void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
+                                      ConstantInt *Idx, Value *S,
+                                      Instruction *I);
   // Rewrites candidate C with respect to Basis.
   void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
   // A helper function that factors ArrayIdx to a product of a stride and a
-  // constant index, and invokes allocateCandidateAndFindBasis with the
+  // constant index, and invokes allocateCandidatesAndFindBasis with the
   // factorings.
   void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
                         GetElementPtrInst *GEP);
@@ -187,7 +202,7 @@ private:
   // Temporarily holds all instructions that are unlinked (but not deleted) by
   // rewriteCandidateWithBasis. These instructions will be actually removed
   // after all rewriting finishes.
-  DenseSet<Instruction *> UnlinkedInstructions;
+  std::vector<Instruction *> UnlinkedInstructions;
 };
 }  // anonymous namespace
 
@@ -215,9 +230,9 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
           Basis.CandidateKind == C.CandidateKind);
 }
 
-static bool isCompletelyFoldable(GetElementPtrInst *GEP,
-                                 const TargetTransformInfo *TTI,
-                                 const DataLayout *DL) {
+static bool isGEPFoldable(GetElementPtrInst *GEP,
+                          const TargetTransformInfo *TTI,
+                          const DataLayout *DL) {
   GlobalVariable *BaseGV = nullptr;
   int64_t BaseOffset = 0;
   bool HasBaseReg = false;
@@ -252,53 +267,143 @@ static bool isCompletelyFoldable(GetElementPtrInst *GEP,
                                     BaseOffset, HasBaseReg, Scale);
 }
 
-// TODO: We currently implement an algorithm whose time complexity is linear to
-// the number of existing candidates. However, a better algorithm exists. We
-// could depth-first search the dominator tree, and maintain a hash table that
-// contains all candidates that dominate the node being traversed.  This hash
-// table is indexed by the base and the stride of a candidate.  Therefore,
-// finding the immediate basis of a candidate boils down to one hash-table look
-// up.
-void StraightLineStrengthReduce::allocateCandidateAndFindBasis(
-    Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
-    Instruction *I) {
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-    // If &B[Idx * S] fits into an addressing mode, do not turn it into
-    // non-free computation.
-    if (isCompletelyFoldable(GEP, TTI, DL))
-      return;
+// Returns whether (Base + Index * Stride) can be folded to an addressing mode.
+static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
+                          TargetTransformInfo *TTI) {
+  return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
+                                    Index->getSExtValue());
+}
+
+bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
+                                            TargetTransformInfo *TTI,
+                                            const DataLayout *DL) {
+  if (C.CandidateKind == Candidate::Add)
+    return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
+  if (C.CandidateKind == Candidate::GEP)
+    return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL);
+  return false;
+}
+
+// Returns true if GEP has zero or one non-zero index.
+static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) {
+  unsigned NumNonZeroIndices = 0;
+  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
+    ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
+    if (ConstIdx == nullptr || !ConstIdx->isZero())
+      ++NumNonZeroIndices;
+  }
+  return NumNonZeroIndices <= 1;
+}
+
+bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
+  if (C.CandidateKind == Candidate::Add) {
+    // B + 1 * S or B + (-1) * S
+    return C.Index->isOne() || C.Index->isMinusOne();
+  }
+  if (C.CandidateKind == Candidate::Mul) {
+    // (B + 0) * S
+    return C.Index->isZero();
+  }
+  if (C.CandidateKind == Candidate::GEP) {
+    // (char*)B + S or (char*)B - S
+    return ((C.Index->isOne() || C.Index->isMinusOne()) &&
+            hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
   }
+  return false;
+}
 
+// TODO: We currently implement an algorithm whose time complexity is linear in
+// the number of existing candidates. However, we could do better by using
+// ScopedHashTable. Specifically, while traversing the dominator tree, we could
+// maintain all the candidates that dominate the basic block being traversed in
+// a ScopedHashTable. This hash table is indexed by the base and the stride of
+// a candidate. Therefore, finding the immediate basis of a candidate boils down
+// to one hash-table look up.
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+    Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+    Instruction *I) {
   Candidate C(CT, B, Idx, S, I);
-  // Try to compute the immediate basis of C.
-  unsigned NumIterations = 0;
-  // Limit the scan radius to avoid running forever.
-  static const unsigned MaxNumIterations = 50;
-  for (auto Basis = Candidates.rbegin();
-       Basis != Candidates.rend() && NumIterations < MaxNumIterations;
-       ++Basis, ++NumIterations) {
-    if (isBasisFor(*Basis, C)) {
-      C.Basis = &(*Basis);
-      break;
+  // SLSR can complicate an instruction in two cases:
+  //
+  // 1. If we can fold I into an addressing mode, computing I is likely free or
+  // takes only one instruction.
+  //
+  // 2. I is already in a simplest form. For example, when
+  //      X = B + 8 * S
+  //      Y = B + S,
+  //    rewriting Y to X - 7 * S is probably a bad idea.
+  //
+  // In the above cases, we still add I to the candidate list so that I can be
+  // the basis of other candidates, but we leave I's basis blank so that I
+  // won't be rewritten.
+  if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
+    // Try to compute the immediate basis of C.
+    unsigned NumIterations = 0;
+    // Limit the scan radius to avoid running in quadratice time.
+    static const unsigned MaxNumIterations = 50;
+    for (auto Basis = Candidates.rbegin();
+         Basis != Candidates.rend() && NumIterations < MaxNumIterations;
+         ++Basis, ++NumIterations) {
+      if (isBasisFor(*Basis, C)) {
+        C.Basis = &(*Basis);
+        break;
+      }
     }
   }
   // Regardless of whether we find a basis for C, we need to push C to the
-  // candidate list.
+  // candidate list so that it can be the basis of other candidates.
   Candidates.push_back(C);
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) {
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+    Instruction *I) {
   switch (I->getOpcode()) {
+  case Instruction::Add:
+    allocateCandidatesAndFindBasisForAdd(I);
+    break;
   case Instruction::Mul:
-    allocateCandidateAndFindBasisForMul(I);
+    allocateCandidatesAndFindBasisForMul(I);
     break;
   case Instruction::GetElementPtr:
-    allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I));
+    allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
     break;
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+    Instruction *I) {
+  // Try matching B + i * S.
+  if (!isa<IntegerType>(I->getType()))
+    return;
+
+  assert(I->getNumOperands() == 2 && "isn't I an add?");
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
+  if (LHS != RHS)
+    allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+    Value *LHS, Value *RHS, Instruction *I) {
+  Value *S = nullptr;
+  ConstantInt *Idx = nullptr;
+  if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
+    // I = LHS + RHS = LHS + Idx * S
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+  } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
+    // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
+    APInt One(Idx->getBitWidth(), 1);
+    Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+  } else {
+    // At least, I = LHS + 1 * RHS
+    ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
+                                   I);
+  }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
     Value *LHS, Value *RHS, Instruction *I) {
   Value *B = nullptr;
   ConstantInt *Idx = nullptr;
@@ -306,54 +411,54 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
   if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) {
     // If LHS is in the form of "Base + Index", then I is in the form of
     // "(Base + Index) * RHS".
-    allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
   } else {
     // Otherwise, at least try the form (LHS + 0) * RHS.
     ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
-    allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
+    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
                                   I);
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
     Instruction *I) {
   // Try matching (B + i) * S.
   // TODO: we could extend SLSR to float and vector types.
   if (!isa<IntegerType>(I->getType()))
     return;
 
+  assert(I->getNumOperands() == 2 && "isn't I a mul?");
   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
-  allocateCandidateAndFindBasisForMul(LHS, RHS, I);
+  allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
   if (LHS != RHS) {
     // Symmetrically, try to split RHS to Base + Index.
-    allocateCandidateAndFindBasisForMul(RHS, LHS, I);
+    allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
     const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
     Instruction *I) {
-  // I = B + sext(Idx *nsw S) *nsw ElementSize
+  // I = B + sext(Idx *nsw S) * ElementSize
+  //   = B + (sext(Idx) * sext(S)) * ElementSize
   //   = B + (sext(Idx) * ElementSize) * sext(S)
   // Casting to IntegerType is safe because we skipped vector GEPs.
   IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
   ConstantInt *ScaledIdx = ConstantInt::get(
       IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
-  allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
+  allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
 }
 
 void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
                                                   const SCEV *Base,
                                                   uint64_t ElementSize,
                                                   GetElementPtrInst *GEP) {
-  // At least, ArrayIdx = ArrayIdx *s 1.
-  allocateCandidateAndFindBasisForGEP(
+  // At least, ArrayIdx = ArrayIdx *nsw 1.
+  allocateCandidatesAndFindBasisForGEP(
       Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
       ArrayIdx, ElementSize, GEP);
   Value *LHS = nullptr;
   ConstantInt *RHS = nullptr;
-  // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4).
-  //
   // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
   // itself. This would allow us to handle the shl case for free. However,
   // matching SCEVs has two issues:
@@ -367,12 +472,19 @@ void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
   // sext'ed multiplication.
   if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
     // SLSR is currently unsafe if i * S may overflow.
-    // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize
-    allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+    // GEP = Base + sext(LHS *nsw RHS) * ElementSize
+    allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+  } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
+    // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
+    //     = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
+    APInt One(RHS->getBitWidth(), 1);
+    ConstantInt *PowerOf2 =
+        ConstantInt::get(RHS->getContext(), One << RHS->getValue());
+    allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
     GetElementPtrInst *GEP) {
   // TODO: handle vector GEPs
   if (GEP->getType()->isVectorTy())
@@ -436,6 +548,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
     else
       BumpWithUglyGEP = true;
   }
+
   // Compute Bump = C - Basis = (i' - i) * S.
   // Common case 1: if (i' - i) is 1, Bump = S.
   if (IndexOffset.getSExtValue() == 1)
@@ -443,9 +556,24 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
   // Common case 2: if (i' - i) is -1, Bump = -S.
   if (IndexOffset.getSExtValue() == -1)
     return Builder.CreateNeg(C.Stride);
-  // Otherwise, Bump = (i' - i) * sext/trunc(S).
-  ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset);
-  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType());
+
+  // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
+  // have different bit widths.
+  IntegerType *DeltaType =
+      IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
+  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
+  if (IndexOffset.isPowerOf2()) {
+    // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
+    ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
+    return Builder.CreateShl(ExtendedStride, Exponent);
+  }
+  if ((-IndexOffset).isPowerOf2()) {
+    // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
+    ConstantInt *Exponent =
+        ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
+    return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
+  }
+  Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
   return Builder.CreateMul(ExtendedStride, Delta);
 }
 
@@ -453,6 +581,9 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
     const Candidate &C, const Candidate &Basis) {
   assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
          C.Stride == Basis.Stride);
+  // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
+  // basis of a candidate cannot be unlinked before the candidate.
+  assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
 
   // An instruction can correspond to multiple candidates. Therefore, instead of
   // simply deleting an instruction when we rewrite it, we mark its parent as
@@ -466,25 +597,38 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
   Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
   Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
   switch (C.CandidateKind) {
+  case Candidate::Add:
   case Candidate::Mul:
-    Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+    if (BinaryOperator::isNeg(Bump)) {
+      Reduced =
+          Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
+    } else {
+      Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+    }
     break;
   case Candidate::GEP:
     {
       Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
+      bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
       if (BumpWithUglyGEP) {
         // C = (char *)Basis + Bump
         unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
         Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
         Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
-        // We only considered inbounds GEP as candidates.
-        Reduced = Builder.CreateInBoundsGEP(Reduced, Bump);
+        if (InBounds)
+          Reduced =
+              Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
+        else
+          Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
         Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
       } else {
         // C = gep Basis, Bump
         // Canonicalize bump to pointer size.
         Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
-        Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump);
+        if (InBounds)
+          Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+        else
+          Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
       }
     }
     break;
@@ -497,7 +641,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
   // Unlink C.Ins so that we can skip other candidates also corresponding to
   // C.Ins. The actual deletion is postponed to the end of runOnFunction.
   C.Ins->removeFromParent();
-  UnlinkedInstructions.insert(C.Ins);
+  UnlinkedInstructions.push_back(C.Ins);
 }
 
 bool StraightLineStrengthReduce::runOnFunction(Function &F) {
@@ -512,7 +656,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
   for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
        node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
     for (auto &I : *node->getBlock())
-      allocateCandidateAndFindBasis(&I);
+      allocateCandidatesAndFindBasis(&I);
   }
 
   // Rewrite candidates in the reverse depth-first order. This order makes sure
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 6c3ce58..4f23e20 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -887,7 +887,7 @@ void StructurizeCFG::createFlow() {
 /// no longer dominate all their uses. Not sure if this is really nessasary
 void StructurizeCFG::rebuildSSA() {
   SSAUpdater Updater;
-  for (const auto &BB : ParentRegion->blocks())
+  for (auto *BB : ParentRegion->blocks())
     for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
          II != IE; ++II) {
 
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 820544b..c1cd39a 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) {
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     BasicBlock *B = I;
     TerminatorInst *Last = B->getTerminator();
-    DebugLoc LastLoc = Last->getDebugLoc();
-    if (LastLoc.isUnknown()) continue;
-    DILocation LastDIL(LastLoc.getAsMDNode(Ctx));
+    DILocation LastDIL = Last->getDebugLoc().get();
+    if (!LastDIL)
+      continue;
 
     for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
       BasicBlock *Succ = Last->getSuccessor(I);
       Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
-      DebugLoc FirstLoc = First->getDebugLoc();
-      if (FirstLoc.isUnknown()) continue;
-      DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx));
+      DILocation FirstDIL = First->getDebugLoc().get();
+      if (!FirstDIL)
+        continue;
 
       // If the first instruction (First) of Succ is at the same file
       // location as B's last instruction (Last), add a new
       // discriminator for First's location and all the instructions
       // in Succ that share the same location with First.
-      if (FirstDIL.atSameLineAs(LastDIL)) {
+      if (!FirstDIL->canDiscriminate(*LastDIL)) {
         // Create a new lexical scope and compute a new discriminator
         // number for it.
-        StringRef Filename = FirstDIL.getFilename();
-        DIScope Scope = FirstDIL.getScope();
-        DIFile File = Builder.createFile(Filename, Scope.getDirectory());
-        unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
+        StringRef Filename = FirstDIL->getFilename();
+        auto *Scope = FirstDIL->getScope();
+        DIFile File = Builder.createFile(Filename, Scope->getDirectory());
+
+        // FIXME: Calculate the discriminator here, based on local information,
+        // and delete MDLocation::computeNewDiscriminator().  The current
+        // solution gives different results depending on other modules in the
+        // same context.  All we really need is to discriminate between
+        // FirstDIL and LastDIL -- a local map would suffice.
+        unsigned Discriminator = FirstDIL->computeNewDiscriminator();
         DILexicalBlockFile NewScope =
             Builder.createLexicalBlockFile(Scope, File, Discriminator);
-        DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
-        DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
+        auto *NewDIL =
+            MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
+                            NewScope, FirstDIL->getInlinedAt());
+        DebugLoc newDebugLoc = NewDIL;
 
         // Attach this new debug location to First and every
         // instruction following First that shares the same location.
         for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
              ++I1) {
-          if (I1->getDebugLoc() != FirstLoc) break;
+          if (I1->getDebugLoc().get() != FirstDIL)
+            break;
           I1->setDebugLoc(newDebugLoc);
-          DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber()
-                       << ":" << NewDIL.getColumnNumber() << ":"
-                       << NewDIL.getDiscriminator() << *I1 << "\n");
+          DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
+                       << ":" << NewDIL->getColumn() << ":"
+                       << NewDIL->getDiscriminator() << *I1 << "\n");
         }
         DEBUG(dbgs() << "\n");
         Changed = true;
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index f04ea9c..f200b58 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -157,20 +157,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 // Find the MDNode which corresponds to the DISubprogram data that described F.
 static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
   for (DISubprogram Subprogram : Finder.subprograms()) {
-    if (Subprogram.describes(F)) return Subprogram;
+    if (Subprogram->describes(F))
+      return Subprogram;
   }
   return nullptr;
 }
 
 // Add an operand to an existing MDNode. The new operand will be added at the
 // back of the operand list.
-static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) {
+static void AddOperand(DICompileUnit CU, MDSubprogramArray SPs, Metadata *NewSP) {
   SmallVector<Metadata *, 16> NewSPs;
-  NewSPs.reserve(SPs->getNumOperands() + 1);
-  for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I)
-    NewSPs.push_back(SPs->getOperand(I));
+  NewSPs.reserve(SPs.size() + 1);
+  for (auto *SP : SPs)
+    NewSPs.push_back(SP);
   NewSPs.push_back(NewSP);
-  CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs)));
+  CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
 }
 
 // Clone the module-level debug info associated with OldFunc. The cloned data
@@ -186,15 +187,15 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
   // Ensure that OldFunc appears in the map.
   // (if it's already there it must point to NewFunc anyway)
   VMap[OldFunc] = NewFunc;
-  DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap));
+  DISubprogram NewSubprogram =
+      cast<MDSubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
 
   for (DICompileUnit CU : Finder.compile_units()) {
-    DIArray Subprograms(CU.getSubprograms());
-
+    auto Subprograms = CU->getSubprograms();
     // If the compile unit's function list contains the old function, it should
     // also contain the new one.
-    for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
-      if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
+    for (auto *SP : Subprograms) {
+      if (SP == OldSubprogramMDNode) {
         AddOperand(CU, Subprograms, NewSubprogram);
         break;
       }
@@ -395,7 +396,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
     if (Action == CloningDirector::CloneSuccessors) {
       // If the director says to skip with a terminate instruction, we still
       // need to clone this block's successors.
-      const TerminatorInst *TI = BB->getTerminator();
+      const TerminatorInst *TI = NewBB->getTerminator();
       for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
         ToClone.push_back(TI->getSuccessor(i));
       return;
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 52e2d59..44b7d25 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
         if (MSI->isVolatile())
           return true;
         GS.StoredType = GlobalStatus::Stored;
-      } else if (ImmutableCallSite C = I) {
+      } else if (auto C = ImmutableCallSite(I)) {
         if (!C.isCallee(&U))
           return true;
         GS.IsLoaded = true;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index df3e1d4..a08ffbe 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -835,11 +835,10 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
                     DenseMap<const MDLocation *, MDLocation *> &IANodes) {
   SmallVector<MDLocation*, 3> InlinedAtLocations;
   MDLocation *Last = InlinedAtNode;
-  DebugLoc CurInlinedAt = DL;
+  MDLocation *CurInlinedAt = DL;
 
   // Gather all the inlined-at nodes
-  while (MDLocation *IA =
-             cast_or_null<MDLocation>(CurInlinedAt.getInlinedAt(Ctx))) {
+  while (MDLocation *IA = CurInlinedAt->getInlinedAt()) {
     // Skip any we've already built nodes for
     if (MDLocation *Found = IANodes[IA]) {
       Last = Found;
@@ -847,7 +846,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
     }
 
     InlinedAtLocations.push_back(IA);
-    CurInlinedAt = DebugLoc::getFromDILocation(IA);
+    CurInlinedAt = IA;
   }
 
   // Starting from the top, rebuild the nodes to point to the new inlined-at
@@ -862,7 +861,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
 
   // And finally create the normal location for this instruction, referring to
   // the new inlined-at chain.
-  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last);
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
 }
 
 /// Update inlined instructions' line numbers to
@@ -870,11 +869,11 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
                              Instruction *TheCall) {
   DebugLoc TheCallDL = TheCall->getDebugLoc();
-  if (TheCallDL.isUnknown())
+  if (!TheCallDL)
     return;
 
   auto &Ctx = Fn->getContext();
-  auto *InlinedAtNode = cast<MDLocation>(TheCallDL.getAsMDNode(Ctx));
+  MDLocation *InlinedAtNode = TheCallDL;
 
   // Create a unique call site, not to be confused with any other call from the
   // same location.
@@ -891,7 +890,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
          BI != BE; ++BI) {
       DebugLoc DL = BI->getDebugLoc();
-      if (DL.isUnknown()) {
+      if (!DL) {
         // If the inlined instruction has no line number, make it look as if it
         // originates from the call location. This is important for
         // ((__always_inline__, __nodebug__)) functions which must use caller
@@ -905,19 +904,6 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
         BI->setDebugLoc(TheCallDL);
       } else {
         BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
-        if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
-          LLVMContext &Ctx = BI->getContext();
-          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
-          DVI->setOperand(2, MetadataAsValue::get(
-                                 Ctx, createInlinedVariable(DVI->getVariable(),
-                                                            InlinedAt, Ctx)));
-        } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
-          LLVMContext &Ctx = BI->getContext();
-          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
-          DDI->setOperand(1, MetadataAsValue::get(
-                                 Ctx, createInlinedVariable(DDI->getVariable(),
-                                                            InlinedAt, Ctx)));
-        }
       }
     }
   }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index bd15f9e..1c9760e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -998,17 +998,14 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            StoreInst *SI, DIBuilder &Builder) {
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  DIVariable DIVar = DDI->getVariable();
+  DIExpression DIExpr = DDI->getExpression();
   if (!DIVar)
     return false;
 
   if (LdStHasDebugValue(DIVar, SI))
     return true;
 
-  Instruction *DbgVal = nullptr;
   // If an argument is zero extended then use argument directly. The ZExt
   // may be zapped by an optimization pass in future.
   Argument *ExtendedArg = nullptr;
@@ -1017,11 +1014,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
     ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
   if (ExtendedArg)
-    DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI);
+    Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+                                    DDI->getDebugLoc(), SI);
   else
-    DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar,
-                                             DIExpr, SI);
-  DbgVal->setDebugLoc(DDI->getDebugLoc());
+    Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
+                                    DDI->getDebugLoc(), SI);
   return true;
 }
 
@@ -1029,19 +1026,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            LoadInst *LI, DIBuilder &Builder) {
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  DIVariable DIVar = DDI->getVariable();
+  DIExpression DIExpr = DDI->getExpression();
   if (!DIVar)
     return false;
 
   if (LdStHasDebugValue(DIVar, LI))
     return true;
 
-  Instruction *DbgVal =
-      Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI);
-  DbgVal->setDebugLoc(DDI->getDebugLoc());
+  Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr,
+                                  DDI->getDebugLoc(), LI);
   return true;
 }
 
@@ -1083,10 +1077,9 @@ bool llvm::LowerDbgDeclare(Function &F) {
           // This is a call by-value or some other instruction that
           // takes a pointer to the variable. Insert a *value*
           // intrinsic that describes the alloca.
-          auto DbgVal = DIB.insertDbgValueIntrinsic(
-              AI, 0, DIVariable(DDI->getVariable()),
-              DIExpression(DDI->getExpression()), CI);
-          DbgVal->setDebugLoc(DDI->getDebugLoc());
+          DIB.insertDbgValueIntrinsic(AI, 0, DIVariable(DDI->getVariable()),
+                                      DIExpression(DDI->getExpression()),
+                                      DDI->getDebugLoc(), CI);
         }
       DDI->eraseFromParent();
     }
@@ -1112,10 +1105,8 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
   if (!DDI)
     return false;
   DebugLoc Loc = DDI->getDebugLoc();
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  DIVariable DIVar = DDI->getVariable();
+  DIExpression DIExpr = DDI->getExpression();
   if (!DIVar)
     return false;
 
@@ -1127,16 +1118,14 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
     SmallVector<uint64_t, 4> NewDIExpr;
     NewDIExpr.push_back(dwarf::DW_OP_deref);
     if (DIExpr)
-      for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i)
-        NewDIExpr.push_back(DIExpr.getElement(i));
+      NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
     DIExpr = Builder.createExpression(NewDIExpr);
   }
 
   // Insert llvm.dbg.declare in the same basic block as the original alloca,
   // and remove old llvm.dbg.declare.
   BasicBlock *BB = AI->getParent();
-  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, BB)
-    ->setDebugLoc(Loc);
+  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);
   DDI->eraseFromParent();
   return true;
 }
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 6b3aa02..1dbce47 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
 /// execute without exiting the loop.
 ///
+/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
+/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
+/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
+/// iterations before branching into the unrolled loop.  UnrollLoop will not
+/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
+/// AllowExpensiveTripCount is false.
+///
 /// The LoopInfo Analysis that is passed will be kept consistent.
 ///
 /// If a LoopPassManager is passed in, and the loop is fully removed, it will be
@@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
 /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
 /// available from the Pass it must also preserve those analyses.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
-                      bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI,
-                      Pass *PP, LPPassManager *LPM, AssumptionCache *AC) {
+                      bool AllowRuntime, bool AllowExpensiveTripCount,
+                      unsigned TripMultiple, LoopInfo *LI, Pass *PP,
+                      LPPassManager *LPM, AssumptionCache *AC) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   // flag is specified.
   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
 
-  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+  if (RuntimeTripCount &&
+      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
     return false;
 
   // Notify ScalarEvolution that the loop will be substantially changed,
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 381d8fc..d1774df 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
 /// ...
 /// End:
 ///
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
+                                   bool AllowExpensiveTripCount, LoopInfo *LI,
                                    LPPassManager *LPM) {
   // for now, only unroll loops that contain a single exit
   if (!L->getExitingBlock())
@@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   if (isa<SCEVCouldNotCompute>(TripCountSC))
     return false;
 
+  BasicBlock *Header = L->getHeader();
+  const DataLayout &DL = Header->getModule()->getDataLayout();
+  SCEVExpander Expander(*SE, DL, "loop-unroll");
+  if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
+    return false;
+
   // We only handle cases when the unroll factor is a power of 2.
   // Count is the loop unroll factor, the number of extra copies added + 1.
   if (!isPowerOf2_32(Count))
     return false;
 
   // This constraint lets us deal with an overflowing trip count easily; see the
-  // comment on ModVal below.  This check is equivalent to `Log2(Count) <
-  // BEWidth`.
-  if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
+  // comment on ModVal below.
+  if (Log2_32(Count) > BEWidth)
     return false;
 
   // If this loop is nested, then the loop unroller changes the code in
@@ -333,18 +339,15 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
 
   BasicBlock *PH = L->getLoopPreheader();
-  BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   // It helps to splits the original preheader twice, one for the end of the
   // prolog code and one for a new loop preheader
   BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
   BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
   BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
-  const DataLayout &DL = Header->getModule()->getDataLayout();
 
   // Compute the number of extra iterations required, which is:
   //  extra iterations = run-time trip count % (loop unroll factor + 1)
-  SCEVExpander Expander(*SE, DL, "loop-unroll");
   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                             PreHeaderBR);
   Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 35c701e..014574d 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -93,3 +94,13 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set,
   }
   return GV;
 }
+
+Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
+  if (isa<Function>(FuncOrBitcast))
+    return cast<Function>(FuncOrBitcast);
+  FuncOrBitcast->dump();
+  std::string Err;
+  raw_string_ostream Stream(Err);
+  Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
+  report_fatal_error(Err);
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 4b34b19..54e1733 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -872,8 +872,10 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
   }
 
   SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
-  SmallPtrSet<DomTreeNode *, 32> Visited;
   SmallVector<DomTreeNode *, 32> Worklist;
+  SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
+  SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
+
   while (!PQ.empty()) {
     DomTreeNodePair RootPair = PQ.top();
     PQ.pop();
@@ -887,6 +889,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
 
     Worklist.clear();
     Worklist.push_back(Root);
+    VisitedWorklist.insert(Root);
 
     while (!Worklist.empty()) {
       DomTreeNode *Node = Worklist.pop_back_val();
@@ -905,7 +908,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
         if (SuccLevel > RootLevel)
           continue;
 
-        if (!Visited.insert(SuccNode).second)
+        if (!VisitedPQ.insert(SuccNode).second)
           continue;
 
         BasicBlock *SuccBB = SuccNode->getBlock();
@@ -919,7 +922,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
 
       for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
            ++CI) {
-        if (!Visited.count(*CI))
+        if (VisitedWorklist.insert(*CI).second)
           Worklist.push_back(*CI);
       }
     }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c7c0ca6..7c239cb 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1502,7 +1502,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     if (isa<DbgInfoIntrinsic>(I))
       continue;
 
-    // Only speculatively execution a single instruction (not counting the
+    // Only speculatively execute a single instruction (not counting the
     // terminator) for now.
     ++SpeculationCost;
     if (SpeculationCost > 1)
@@ -3884,8 +3884,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
                                    "switch.tableidx.zext");
 
       Value *GEPIndices[] = { Builder.getInt32(0), Index };
-      Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
-                                             "switch.gep");
+      Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
+                                             GEPIndices, "switch.gep");
       return Builder.CreateLoad(GEP, "switch.load");
     }
   }
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 5867d65..42102e7 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -222,7 +222,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
   // Now that we have the destination's length, we must index into the
   // destination's pointer to get the actual memcpy destination (end of
   // the string .. we're concatenating).
-  Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+  Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
 
   // We have enough information to now generate the memcpy call to do the
   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
@@ -303,7 +303,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
   StringRef Str;
   if (!getConstantStringInfo(SrcStr, Str)) {
     if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
-      return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+      return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
     return nullptr;
   }
 
@@ -316,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // strchr(s+n,c)  -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
 }
 
 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
@@ -351,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // strrchr(s+n,c) -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
 }
 
 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
@@ -476,7 +476,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
     Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
   }
 
   // See if we can get the length of the input string.
@@ -487,7 +487,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
   Type *PT = FT->getParamType(0);
   Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
   Value *DstEnd =
-      B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
+      B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
 
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
@@ -597,7 +597,7 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
     if (I == StringRef::npos) // No match.
       return Constant::getNullValue(CI->getType());
 
-    return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+    return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk");
   }
 
   // strpbrk(s, "a") -> strchr(s, 'a')
@@ -828,7 +828,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // memchr(s+n,c,l) -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "memchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
 }
 
 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
@@ -1671,7 +1671,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
     Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
     Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
     B.CreateStore(V, Ptr);
-    Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+    Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
     B.CreateStore(B.getInt8(0), Ptr);
 
     return ConstantInt::get(CI->getType(), 1);
@@ -2276,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // __stpcpy_chk(x,x,...)  -> x+strlen(x)
   if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
     Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
   }
 
   // If a) we don't have any length information, or b) we know this will
@@ -2284,25 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
   // TODO: It might be nice to get a maximum length out of the possible
   // string lengths for varying.
-  if (isFortifiedCallFoldable(CI, 2, 1, true)) {
-    Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
-    return Ret;
-  } else if (!OnlyLowerUnknownSize) {
-    // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
-    uint64_t Len = GetStringLength(Src);
-    if (Len == 0)
-      return nullptr;
+  if (isFortifiedCallFoldable(CI, 2, 1, true))
+    return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
 
-    Type *SizeTTy = DL.getIntPtrType(CI->getContext());
-    Value *LenV = ConstantInt::get(SizeTTy, Len);
-    Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
-    // If the function was an __stpcpy_chk, and we were able to fold it into
-    // a __memcpy_chk, we still need to return the correct end pointer.
-    if (Ret && Func == LibFunc::stpcpy_chk)
-      return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1));
-    return Ret;
-  }
-  return nullptr;
+  if (OnlyLowerUnknownSize)
+    return nullptr;
+
+  // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
+  uint64_t Len = GetStringLength(Src);
+  if (Len == 0)
+    return nullptr;
+
+  Type *SizeTTy = DL.getIntPtrType(CI->getContext());
+  Value *LenV = ConstantInt::get(SizeTTy, Len);
+  Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+  // If the function was an __stpcpy_chk, and we were able to fold it into
+  // a __memcpy_chk, we still need to return the correct end pointer.
+  if (Ret && Func == LibFunc::stpcpy_chk)
+    return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
+  return Ret;
 }
 
 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
@@ -2322,8 +2322,18 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
 }
 
 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
-  if (CI->isNoBuiltin())
-    return nullptr;
+  // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
+  // Some clang users checked for _chk libcall availability using:
+  //   __has_builtin(__builtin___memcpy_chk)
+  // When compiling with -fno-builtin, this is always true.
+  // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
+  // end up with fortified libcalls, which isn't acceptable in a freestanding
+  // environment which only provides their non-fortified counterparts.
+  //
+  // Until we change clang and/or teach external users to check for availability
+  // differently, disregard the "nobuiltin" attribute and TLI::has.
+  //
+  // PR23093.
 
   LibFunc::Func Func;
   Function *Callee = CI->getCalledFunction();
@@ -2332,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
 
   // First, check that this is a known library functions.
-  if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func))
+  if (!TLI->getLibFunc(FuncName, Func))
     return nullptr;
 
   // We never change the calling convention.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index b7d0ae4..8986932 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -93,6 +93,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <map>
 #include <tuple>
@@ -503,8 +504,7 @@ static std::string getDebugLocString(const Loop *L) {
   std::string Result;
   if (L) {
     raw_string_ostream OS(Result);
-    const DebugLoc LoopDbgLoc = L->getStartLoc();
-    if (!LoopDbgLoc.isUnknown())
+    if (const DebugLoc LoopDbgLoc = L->getStartLoc())
       LoopDbgLoc.print(OS);
     else
       // Just print the module name.
@@ -686,7 +686,7 @@ public:
           Index = B.CreateNeg(Index);
         else if (!StepValue->isOne())
           Index = B.CreateMul(Index, StepValue);
-        return B.CreateGEP(StartValue, Index);
+        return B.CreateGEP(nullptr, StartValue, Index);
 
       case IK_NoInduction:
         return nullptr;
@@ -1839,7 +1839,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
     
     for (unsigned Part = 0; Part < UF; ++Part) {
       // Calculate the pointer for the specific unroll-part.
-      Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+      Value *PartPtr =
+          Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
       if (Reverse) {
         // If we store to reverse consecutive memory locations then we need
@@ -1847,8 +1848,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
         StoredVal[Part] = reverseVector(StoredVal[Part]);
         // If the address is consecutive but reversed, then the
         // wide store needs to start at the last vector element.
-        PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
-        PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+        PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+        PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
         Mask[Part] = reverseVector(Mask[Part]);
       }
 
@@ -1871,13 +1872,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   setDebugLocFromInst(Builder, LI);
   for (unsigned Part = 0; Part < UF; ++Part) {
     // Calculate the pointer for the specific unroll-part.
-    Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+    Value *PartPtr =
+        Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
     if (Reverse) {
       // If the address is consecutive but reversed, then the
       // wide load needs to start at the last vector element.
-      PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
-      PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+      PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+      PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
       Mask[Part] = reverseVector(Mask[Part]);
     }
 
@@ -4007,6 +4009,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   if (!LAI->canVectorizeMemory())
     return false;
 
+  if (LAI->hasStoreToLoopInvariantAddress()) {
+    emitAnalysis(
+        VectorizationReport()
+        << "write to a loop invariant address could not be vectorized");
+    DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+    return false;
+  }
+
   if (LAI->getNumRuntimePointerChecks() >
       VectorizerParams::RuntimeMemoryCheckThreshold) {
     emitAnalysis(VectorizationReport()
@@ -4307,32 +4317,31 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
   }
 }
 
-LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
-                                               ConstantInt *&StepValue) {
+bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+                          ConstantInt *&StepValue) {
   Type *PhiTy = Phi->getType();
   // We only handle integer and pointer inductions variables.
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
-    return IK_NoInduction;
+    return false;
 
   // Check that the PHI is consecutive.
   const SCEV *PhiScev = SE->getSCEV(Phi);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
   if (!AR) {
     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
-    return IK_NoInduction;
+    return false;
   }
 
   const SCEV *Step = AR->getStepRecurrence(*SE);
   // Calculate the pointer stride and check if it is consecutive.
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
   if (!C)
-    return IK_NoInduction;
+    return false;
 
   ConstantInt *CV = C->getValue();
   if (PhiTy->isIntegerTy()) {
     StepValue = CV;
-    return IK_IntInduction;
+    return true;
   }
 
   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
@@ -4340,14 +4349,28 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
   // The pointer stride cannot be determined if the pointer element type is not
   // sized.
   if (!PointerElementType->isSized())
-    return IK_NoInduction;
+    return false;
 
   const DataLayout &DL = Phi->getModule()->getDataLayout();
   int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
   int64_t CVSize = CV->getSExtValue();
   if (CVSize % Size)
-    return IK_NoInduction;
+    return false;
   StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+  return true;
+}
+
+LoopVectorizationLegality::InductionKind
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
+                                               ConstantInt *&StepValue) {
+  if (!isInductionPHI(Phi, SE, StepValue))
+    return IK_NoInduction;
+
+  Type *PhiTy = Phi->getType();
+  // Found an Integer induction variable.
+  if (PhiTy->isIntegerTy())
+    return IK_IntInduction;
+  // Found an Pointer induction variable.
   return IK_PtrInduction;
 }
 
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8fc4cc1..7267f58 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1183,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
     case Instruction::ICmp:
     case Instruction::FCmp: {
       // Check that all of the compares have the same predicate.
-      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
       Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
       for (unsigned i = 1, e = VL.size(); i < e; ++i) {
         CmpInst *Cmp = cast<CmpInst>(VL[i]);
@@ -2202,7 +2202,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       if (Value *V = alreadyVectorized(E->Scalars))
         return V;
 
-      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
       Value *V;
       if (Opcode == Instruction::FCmp)
         V = Builder.CreateFCmp(P0, L, R);
@@ -3101,9 +3101,7 @@ struct SLPVectorizer : public FunctionPass {
     // delete instructions.
 
     // Scan the blocks in the function in post order.
-    for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
-         e = po_end(&F.getEntryBlock()); it != e; ++it) {
-      BasicBlock *BB = *it;
+    for (auto BB : post_order(&F.getEntryBlock())) {
       // Vectorize trees that end at stores.
       if (unsigned count = collectStores(BB, R)) {
         (void)count;
diff --git a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
index eb05e1e..98161bf 100644
--- a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
+++ b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
@@ -26,7 +26,7 @@ no_exit:		; preds = %no_exit, %entry
 loopexit:		; preds = %no_exit, %entry
 	%Y.0.1 = phi i32 [ 0, %entry ], [ %tmp.13, %no_exit ]		; <i32> [#uses=1]
 	%tmp.4 = getelementptr [3 x [3 x i32]], [3 x [3 x i32]]* %X, i32 0, i32 0		; <[3 x i32]*> [#uses=1]
-	%tmp.15 = call i32 (...)* @foo( [3 x i32]* %tmp.4, i32 %Y.0.1 )		; <i32> [#uses=0]
+	%tmp.15 = call i32 (...) @foo( [3 x i32]* %tmp.4, i32 %Y.0.1 )		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/Analysis/BasicAA/2008-04-15-Byval.ll b/test/Analysis/BasicAA/2008-04-15-Byval.ll
index 9df12bd..9d4fd14 100644
--- a/test/Analysis/BasicAA/2008-04-15-Byval.ll
+++ b/test/Analysis/BasicAA/2008-04-15-Byval.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp = getelementptr %struct.x, %struct.x* %X, i32 0, i32 0		; <[4 x i32]*> [#uses=1]
 	%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3		; <i32*> [#uses=1]
 	store i32 2, i32* %tmp1, align 4
-	%tmp2 = call i32 (...)* @bar( %struct.x* byval align 4  %X ) nounwind 		; <i32> [#uses=0]
+	%tmp2 = call i32 (...) @bar( %struct.x* byval align 4  %X ) nounwind 		; <i32> [#uses=0]
 	br label %return
 return:		; preds = %entry
 	ret void
diff --git a/test/Analysis/BasicAA/byval.ll b/test/Analysis/BasicAA/byval.ll
index edbe7b3..4f90c3f 100644
--- a/test/Analysis/BasicAA/byval.ll
+++ b/test/Analysis/BasicAA/byval.ll
@@ -6,7 +6,7 @@ target triple = "i686-apple-darwin8"
 
 define i32 @foo(%struct.x* byval  %a) nounwind  {
 ; CHECK: ret i32 1
-  %tmp1 = tail call i32 (...)* @bar( %struct.x* %a ) nounwind 		; <i32> [#uses=0]
+  %tmp1 = tail call i32 (...) @bar( %struct.x* %a ) nounwind 		; <i32> [#uses=0]
   %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0		; <i32*> [#uses=2]
   store i32 1, i32* %tmp2, align 4
   store i32 2, i32* @g, align 4
diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll
index da62dca..e5b1f50 100644
--- a/test/Analysis/BlockFrequencyInfo/bad_input.ll
+++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll
@@ -32,7 +32,8 @@ define void @infinite_loop(i1 %x) {
 entry:
   br i1 %x, label %for.body, label %for.end, !prof !1
 
-; Check that the loop scale maxes out at 4096, giving 2048 here.
+; Check that the infinite loop is arbitrarily scaled to max out at 4096,
+; giving 2048 here.
 ; CHECK-NEXT: for.body: float = 2048.0,
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
diff --git a/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
new file mode 100644
index 0000000..534c4ad
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
@@ -0,0 +1,204 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; This code contains three loops. One is triple-nested, the
+; second is double nested and the third is a single loop. At
+; runtime, all three loops execute 1,000,000 times each. We use to
+; give different frequencies to each of the loops because loop
+; scales were limited to no more than 4,096.
+;
+; This was penalizing the hotness of the second and third loops
+; because BFI was reducing the loop scale for for.cond16 and
+; for.cond26 to a max of 4,096.
+;
+; Without this restriction, all loops are now correctly given the same
+; frequency values.
+;
+; Original C code:
+;
+;
+; int g;
+; __attribute__((noinline)) void bar() {
+;  g++;
+; }
+;
+; extern int printf(const char*, ...);
+;
+; int main()
+; {
+;   int i, j, k;
+;
+;   g = 0;
+;   for (i = 0; i < 100; i++)
+;     for (j = 0; j < 100; j++)
+;        for (k = 0; k < 100; k++)
+;            bar();
+;
+;   printf ("g = %d\n", g);
+;   g = 0;
+;
+;   for (i = 0; i < 100; i++)
+;     for (j = 0; j < 10000; j++)
+;         bar();
+;
+;   printf ("g = %d\n", g);
+;   g = 0;
+;
+;
+;   for (i = 0; i < 1000000; i++)
+;     bar();
+;
+;   printf ("g = %d\n", g);
+;   g = 0;
+; }
+
+@g = common global i32 0, align 4
+@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1
+
+declare void @bar()
+declare i32 @printf(i8*, ...)
+
+; CHECK: Printing analysis {{.*}} for function 'main':
+; CHECK-NEXT: block-frequency-info: main
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  %j = alloca i32, align 4
+  %k = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* @g, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end12, !prof !1
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %j, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc7, %for.body
+  %1 = load i32, i32* %j, align 4
+  %cmp2 = icmp slt i32 %1, 100
+  br i1 %cmp2, label %for.body3, label %for.end9, !prof !2
+
+for.body3:                                        ; preds = %for.cond1
+  store i32 0, i32* %k, align 4
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %2 = load i32, i32* %k, align 4
+  %cmp5 = icmp slt i32 %2, 100
+  br i1 %cmp5, label %for.body6, label %for.end, !prof !3
+
+; CHECK: - for.body6: float = 500000.5, int = 4000003
+for.body6:                                        ; preds = %for.cond4
+  call void @bar()
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %3 = load i32, i32* %k, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, i32* %k, align 4
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.end
+  %4 = load i32, i32* %j, align 4
+  %inc8 = add nsw i32 %4, 1
+  store i32 %inc8, i32* %j, align 4
+  br label %for.cond1
+
+for.end9:                                         ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end9
+  %5 = load i32, i32* %i, align 4
+  %inc11 = add nsw i32 %5, 1
+  store i32 %inc11, i32* %i, align 4
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  %6 = load i32, i32* @g, align 4
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
+  store i32 0, i32* @g, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond13
+
+for.cond13:                                       ; preds = %for.inc22, %for.end12
+  %7 = load i32, i32* %i, align 4
+  %cmp14 = icmp slt i32 %7, 100
+  br i1 %cmp14, label %for.body15, label %for.end24, !prof !1
+
+for.body15:                                       ; preds = %for.cond13
+  store i32 0, i32* %j, align 4
+  br label %for.cond16
+
+for.cond16:                                       ; preds = %for.inc19, %for.body15
+  %8 = load i32, i32* %j, align 4
+  %cmp17 = icmp slt i32 %8, 10000
+  br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
+
+; CHECK: - for.body18: float = 500000.5, int = 4000003
+for.body18:                                       ; preds = %for.cond16
+  call void @bar()
+  br label %for.inc19
+
+for.inc19:                                        ; preds = %for.body18
+  %9 = load i32, i32* %j, align 4
+  %inc20 = add nsw i32 %9, 1
+  store i32 %inc20, i32* %j, align 4
+  br label %for.cond16
+
+for.end21:                                        ; preds = %for.cond16
+  br label %for.inc22
+
+for.inc22:                                        ; preds = %for.end21
+  %10 = load i32, i32* %i, align 4
+  %inc23 = add nsw i32 %10, 1
+  store i32 %inc23, i32* %i, align 4
+  br label %for.cond13
+
+for.end24:                                        ; preds = %for.cond13
+  %11 = load i32, i32* @g, align 4
+  %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
+  store i32 0, i32* @g, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond26
+
+for.cond26:                                       ; preds = %for.inc29, %for.end24
+  %12 = load i32, i32* %i, align 4
+  %cmp27 = icmp slt i32 %12, 1000000
+  br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
+
+; CHECK: - for.body28: float = 500000.5, int = 4000003
+for.body28:                                       ; preds = %for.cond26
+  call void @bar()
+  br label %for.inc29
+
+for.inc29:                                        ; preds = %for.body28
+  %13 = load i32, i32* %i, align 4
+  %inc30 = add nsw i32 %13, 1
+  store i32 %inc30, i32* %i, align 4
+  br label %for.cond26
+
+for.end31:                                        ; preds = %for.cond26
+  %14 = load i32, i32* @g, align 4
+  %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
+  store i32 0, i32* @g, align 4
+  %15 = load i32, i32* %retval
+  ret i32 %15
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
+!1 = !{!"branch_weights", i32 101, i32 2}
+!2 = !{!"branch_weights", i32 10001, i32 101}
+!3 = !{!"branch_weights", i32 1000001, i32 10001}
+!4 = !{!"branch_weights", i32 1000001, i32 101}
+!5 = !{!"branch_weights", i32 1000001, i32 2}
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index 0f66911..2c9c156 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -212,3 +212,31 @@ exit:
   ret i32 %result
 }
 
+define i32 @zero3(i32 %i, i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'zero3'
+entry:
+; AND'ing with a single bit bitmask essentially leads to a bool comparison,
+; meaning we don't have probability information.
+  %and = and i32 %i, 2
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %then, label %else
+; CHECK: edge entry -> then probability is 16 / 32
+; CHECK: edge entry -> else probability is 16 / 32
+
+then:
+; AND'ing with other bitmask might be something else, so we still assume the
+; usual probabilities.
+  %and2 = and i32 %i, 5
+  %tobool2 = icmp eq i32 %and2, 0
+  br i1 %tobool2, label %else, label %exit
+; CHECK: edge then -> else probability is 12 / 32
+; CHECK: edge then -> exit probability is 20 / 32
+
+else:
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
diff --git a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
index 595cc42..56eac49 100644
--- a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
+++ b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
@@ -12,6 +12,6 @@ entry:
 
 define void @caller() {
 entry:
-	call void (...)* @callee( void (...)* @callee )
+	call void (...) @callee( void (...)* @callee )
 	unreachable
 }
diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
new file mode 100644
index 0000000..9dd3d55
--- /dev/null
+++ b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
@@ -0,0 +1,198 @@
+; RUN: opt %s -analyze -divergence | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
+define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge'
+entry:
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %cond = icmp slt i32 %n, 0
+  br i1 %cond, label %then, label %else ; uniform
+; CHECK-NOT: DIVERGENT: br i1 %cond,
+then:
+  %a1 = add i32 %a, %tid
+  br label %merge
+else:
+  %b2 = add i32 %b, %tid
+  br label %merge
+merge:
+  %c = phi i32 [ %a1, %then ], [ %b2, %else ]
+  ret i32 %c
+}
+
+; c = a;
+; if (threadIdx.x < 5)    // divergent: data dependent
+;   c = b;
+; return c;               // c is divergent: sync dependent
+define i32 @sync(i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync'
+bb1:
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+  %cond = icmp slt i32 %tid, 5
+  br i1 %cond, label %bb2, label %bb3
+; CHECK: DIVERGENT: br i1 %cond,
+bb2:
+  br label %bb3
+bb3:
+  %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid
+; CHECK: DIVERGENT: %c =
+  ret i32 %c
+}
+
+; c = 0;
+; if (threadIdx.x >= 5) {  // divergent
+;   c = (n < 0 ? a : b);  // c here is uniform because n is uniform
+; }
+; // c here is divergent because it is sync dependent on threadIdx.x >= 5
+; return c;
+define i32 @mixed(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed'
+bb1:
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+  %cond = icmp slt i32 %tid, 5
+  br i1 %cond, label %bb6, label %bb2
+; CHECK: DIVERGENT: br i1 %cond,
+bb2:
+  %cond2 = icmp slt i32 %n, 0
+  br i1 %cond2, label %bb4, label %bb3
+bb3:
+  br label %bb5
+bb4:
+  br label %bb5
+bb5:
+  %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ]
+; CHECK-NOT: DIVERGENT: %c =
+  br label %bb6
+bb6:
+  %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ]
+; CHECK: DIVERGENT: %c2 =
+  ret i32 %c2
+}
+
+; We conservatively treats all parameters of a __device__ function as divergent.
+define i32 @device(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device'
+; CHECK: DIVERGENT: i32 %n
+; CHECK: DIVERGENT: i32 %a
+; CHECK: DIVERGENT: i32 %b
+entry:
+  %cond = icmp slt i32 %n, 0
+  br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+  br label %merge
+else:
+  br label %merge
+merge:
+  %c = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %c
+}
+
+; int i = 0;
+; do {
+;   i++;                  // i here is uniform
+; } while (i < laneid);
+; return i == 10 ? 0 : 1; // i here is divergent
+;
+; The i defined in the loop is used outside.
+define i32 @loop() {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
+entry:
+  %laneid = call i32 @llvm.ptx.read.laneid()
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK-NOT: DIVERGENT: %i =
+  %i1 = add i32 %i, 1
+  %exit_cond = icmp sge i32 %i1, %laneid
+  br i1 %exit_cond, label %loop_exit, label %loop
+loop_exit:
+  %cond = icmp eq i32 %i, 10
+  br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+  ret i32 0
+else:
+  ret i32 1
+}
+
+; Same as @loop, but the loop is in the LCSSA form.
+define i32 @lcssa() {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa'
+entry:
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  br label %loop
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK-NOT: DIVERGENT: %i =
+  %i1 = add i32 %i, 1
+  %exit_cond = icmp sge i32 %i1, %tid
+  br i1 %exit_cond, label %loop_exit, label %loop
+loop_exit:
+  %i.lcssa = phi i32 [ %i, %loop ]
+; CHECK: DIVERGENT: %i.lcssa =
+  %cond = icmp eq i32 %i.lcssa, 10
+  br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+  ret i32 0
+else:
+  ret i32 1
+}
+
+; This test contains an unstructured loop.
+;           +-------------- entry ----------------+
+;           |                                     |
+;           V                                     V
+; i1 = phi(0, i3)                            i2 = phi(0, i3)
+;     j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
+;           ^                 |                   ^
+;           |                 V                   |
+;           +-------- switch (tid / i3) ----------+
+;                             |
+;                             V
+;                        if (i3 == 5) // divergent
+; because sync dependent on (tid / i3).
+define i32 @unstructured_loop(i1 %entry_cond) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop'
+entry:
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
+loop_entry_1:
+  %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+  %j1 = add i32 %i1, 1
+  br label %loop_body
+loop_entry_2:
+  %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+  %j2 = add i32 %i2, 2
+  br label %loop_body
+loop_body:
+  %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
+  br label %loop_latch
+loop_latch:
+  %div = sdiv i32 %tid, %i3
+  switch i32 %div, label %branch [ i32 1, label %loop_entry_1
+                                   i32 2, label %loop_entry_2 ]
+branch:
+  %cmp = icmp eq i32 %i3, 5
+  br i1 %cmp, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cmp,
+then:
+  ret i32 0
+else:
+  ret i32 1
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.ptx.read.laneid()
+
+!nvvm.annotations = !{!0, !1, !2, !3, !4}
+!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
+!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
+!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
+!3 = !{i32 ()* @loop, !"kernel", i32 1}
+!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg b/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg
new file mode 100644
index 0000000..2cb98eb
--- /dev/null
+++ b/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll
index a331bf3..5dd47bc 100644
--- a/test/Analysis/GlobalsModRef/volatile-instrs.ll
+++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll
@@ -25,6 +25,6 @@ main_entry:
   %0 = load volatile i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0), align 4
   store i32 %0, i32* @c, align 4
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind
   ret i32 0
 }
diff --git a/test/Analysis/LazyCallGraph/basic.ll b/test/Analysis/LazyCallGraph/basic.ll
index 6e2cb90..7c13d2b 100644
--- a/test/Analysis/LazyCallGraph/basic.ll
+++ b/test/Analysis/LazyCallGraph/basic.ll
@@ -90,7 +90,7 @@ next:
   select i1 true, void ()* @f3, void ()* @f4
   store void ()* @f5, void ()** %x
   call void @f6()
-  call void (void ()*, void ()*)* bitcast (void ()* @f7 to void (void ()*, void ()*)*)(void ()* @f8, void ()* @f9)
+  call void (void ()*, void ()*) bitcast (void ()* @f7 to void (void ()*, void ()*)*)(void ()* @f8, void ()* @f9)
   invoke void @f10() to label %exit unwind label %unwind
 
 exit:
diff --git a/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll b/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll
index 238f3f4..5d4fb7d 100644
--- a/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll
+++ b/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll
@@ -14,7 +14,6 @@ target triple = "x86_64-apple-macosx10.10.0"
 ; CHECK: Report: unsafe dependent memory operations in loop
 ; CHECK-NOT: Memory dependences are safe
 
-@n = global i32 20, align 4
 @B = common global i32* null, align 8
 @A = common global i32* null, align 8
 
diff --git a/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
new file mode 100644
index 0000000..fa70c02
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
@@ -0,0 +1,43 @@
+; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
+
+; If the arrays don't alias this loop is safe with no memchecks:
+;   for (i = 0; i < n; i++)
+;    A[i] = A[i+1] * B[i] * C[i];
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK: Memory dependences are safe{{$}}
+
+define void @f(i16* noalias %a,
+               i16* noalias %b,
+               i16* noalias %c) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %add = add nuw nsw i64 %ind, 1
+
+  %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add
+  %loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2
+
+  %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
+  %loadB = load i16, i16* %arrayidxB, align 2
+
+  %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
+  %loadC = load i16, i16* %arrayidxC, align 2
+
+  %mul = mul i16 %loadB, %loadA_plus_2
+  %mul1 = mul i16 %mul, %loadC
+
+  %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
+  store i16 %mul1, i16* %arrayidxA, align 2
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
new file mode 100644
index 0000000..8ab8ab2
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
+
+; Test to confirm LAA will find store to invariant address.
+; Inner loop has a store to invariant address.
+;
+;  for(; i < itr; i++) {
+;    for(; j < itr; j++) {
+;      var1[i] = var2[j] + var1[i];
+;    }
+;  }
+
+; CHECK: Store to invariant address was found in loop.
+; CHECK-NOT: Store to invariant address was not found in loop.
+
+define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
+entry:
+  %cmp20 = icmp eq i32 %itr, 0
+  br i1 %cmp20, label %for.end10, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
+  %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
+  %cmp218 = icmp ult i32 %j.022, %itr
+  br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
+  %0 = zext i32 %j.022 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx5, align 4
+  %add = add nsw i32 %2, %1
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+  %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+  br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
+
+for.end10:                                        ; preds = %for.inc8, %entry
+  ret i32 undef
+}
+
diff --git a/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
new file mode 100644
index 0000000..4da0906
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -loop-accesses -analyze  | FileCheck %s
+
+; Test to confirm LAA will not find store to invariant address.
+; Inner loop has no store to invariant address.
+;
+;  for(; i < itr; i++) {
+;    for(; j < itr; j++) {
+;      var2[j] = var2[j] + var1[i];
+;    }
+;  }
+
+; CHECK: Store to invariant address was not found in loop.
+; CHECK-NOT: Store to invariant address was found in loop.
+
+
+define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
+entry:
+  %cmp20 = icmp eq i32 %itr, 0
+  br i1 %cmp20, label %for.end10, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
+  %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
+  %cmp218 = icmp ult i32 %j.022, %itr
+  br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
+  %0 = zext i32 %j.022 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx5, align 4
+  %add = add nsw i32 %2, %1
+  store i32 %add, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+  %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+  br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
+
+for.end10:                                        ; preds = %for.inc8, %entry
+  ret i32 undef
+}
+
diff --git a/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
new file mode 100644
index 0000000..18315a5
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
+
+; Test to confirm LAA will find store to invariant address.
+; Inner loop has a store to invariant address.
+;
+;  for(; i < itr; i++) {
+;    for(; j < itr; j++) {
+;      var1[j] = ++var2[i] + var1[j];
+;    }
+;  }
+
+; CHECK: Store to invariant address was found in loop.
+
+define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 {
+entry:
+  %cmp20 = icmp sgt i32 %itr, 0
+  br i1 %cmp20, label %for.cond1.preheader, label %for.end11
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc9
+  %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc9 ], [ 0, %entry ]
+  %j.022 = phi i32 [ %j.1.lcssa, %for.inc9 ], [ 0, %entry ]
+  %cmp218 = icmp slt i32 %j.022, %itr
+  br i1 %cmp218, label %for.body3.lr.ph, label %for.inc9
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv23
+  %0 = sext i32 %j.022 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %1 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
+  %2 = load i32, i32* %arrayidx5, align 4
+  %add = add nsw i32 %inc, %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc9, label %for.body3
+
+for.inc9:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+  %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+  br i1 %exitcond26, label %for.end11, label %for.cond1.preheader
+
+for.end11:                                        ; preds = %for.inc9, %entry
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
index a11fd7f..ce8b86b 100644
--- a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
+++ b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
@@ -20,7 +20,6 @@ target triple = "x86_64-apple-macosx10.10.0"
 ; CHECK-NEXT:   %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add
 ; CHECK-NEXT:   %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %storemerge3
 
-@n = global i32 20, align 4
 @B = common global i16* null, align 8
 @A = common global i16* null, align 8
 @C = common global i16* null, align 8
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index 46c6c59..84561c5 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -13,7 +13,7 @@ bb.nph:		; preds = %entry
 bb:		; preds = %bb.nph, %bb1
 	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
 	%argc_addr.04 = add i32 %indvar, %argc		; <i32> [#uses=1]
-	tail call void (...)* @Test() nounwind
+	tail call void (...) @Test() nounwind
 	%1 = add i32 %argc_addr.04, 1		; <i32> [#uses=1]
 	br label %bb1
 
diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
index c4a4c30..33fcbab 100644
--- a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
+++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
@@ -14,7 +14,7 @@ entry:
   br label %lbl_818
 
 lbl_818:                                          ; preds = %for.end, %entry
-  call void (...)* @func_27()
+  call void (...) @func_27()
   store i32 0, i32* @g_814, align 4
   br label %for.cond
 
diff --git a/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll b/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll
new file mode 100644
index 0000000..3f6f958
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll
@@ -0,0 +1,55 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+declare void @side_effect(i1)
+
+define void @latch_dominating_0(i8 %start) {
+; CHECK-LABEL: latch_dominating_0
+ entry:
+  %e = icmp slt i8 %start, 42
+  br i1 %e, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %idx = phi i8 [ %start, %entry ], [ %idx.inc, %be ]
+  %idx.inc = add i8 %idx, 1
+  %folds.to.true = icmp slt i8 %idx, 42
+; CHECK: call void @side_effect(i1 true)
+  call void @side_effect(i1 %folds.to.true)
+  %c0 = icmp slt i8 %idx.inc, 42
+  br i1 %c0, label %be, label %exit
+
+ be:
+; CHECK: call void @side_effect(i1 true)
+  call void @side_effect(i1 %folds.to.true)
+  %c1 = icmp slt i8 %idx.inc, 100
+  br i1 %c1, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @latch_dominating_1(i8 %start) {
+; CHECK-LABEL: latch_dominating_1
+ entry:
+  %e = icmp slt i8 %start, 42
+  br i1 %e, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %idx = phi i8 [ %start, %entry ], [ %idx.inc, %be ]
+  %idx.inc = add i8 %idx, 1
+  %does.not.fold.to.true = icmp slt i8 %idx, 42
+; CHECK: call void @side_effect(i1 %does.not.fold.to.true)
+  call void @side_effect(i1 %does.not.fold.to.true)
+  %c0 = icmp slt i8 %idx.inc, 42
+  br i1 %c0, label %be, label %be
+
+ be:
+; CHECK: call void @side_effect(i1 %does.not.fold.to.true)
+  call void @side_effect(i1 %does.not.fold.to.true)
+  %c1 = icmp slt i8 %idx.inc, 100
+  br i1 %c1, label %loop, label %exit
+
+ exit:
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll
index 72560c7..614e9b2 100644
--- a/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -65,7 +65,7 @@ for.inc:                                          ; preds = %for.body
   br label %for.cond
 
 for.end:                                          ; preds = %for.body, %for.cond
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0]
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
 
diff --git a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
index 31ebb3e..2b12b33 100644
--- a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
+++ b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
@@ -63,7 +63,7 @@ for.cond.for.end9_crit_edge:                      ; preds = %for.inc8
 
 for.end9:                                         ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge
   %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ]
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
   br label %return
 
 return.loopexit.split:                            ; preds = %for.cond1.preheader.lr.ph
diff --git a/test/Analysis/ValueTracking/memory-dereferenceable.ll b/test/Analysis/ValueTracking/memory-dereferenceable.ll
index 51f9265..bfee5c7 100644
--- a/test/Analysis/ValueTracking/memory-dereferenceable.ll
+++ b/test/Analysis/ValueTracking/memory-dereferenceable.ll
@@ -9,7 +9,7 @@ declare zeroext i1 @return_i1()
 
 @globalstr = global [6 x i8] c"hello\00"
 
-define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) {
+define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
 ; CHECK: The following are dereferenceable:
 ; CHECK: %globalptr
 ; CHECK: %alloca
@@ -22,7 +22,7 @@ entry:
     %alloca = alloca i1
     %load2 = load i1, i1* %alloca
     %load3 = load i32, i32 addrspace(1)* %dparam
-    %tok = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %tok = tail call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
     %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 4, i32 4)
     %load4 = load i32, i32 addrspace(1)* %relocate
     %nparam = getelementptr i32, i32 addrspace(1)* %dparam, i32 5
diff --git a/test/Assembler/2002-07-25-ReturnPtrFunction.ll b/test/Assembler/2002-07-25-ReturnPtrFunction.ll
index fdee93c..0fb9d55 100644
--- a/test/Assembler/2002-07-25-ReturnPtrFunction.ll
+++ b/test/Assembler/2002-07-25-ReturnPtrFunction.ll
@@ -7,7 +7,7 @@
 declare void (i32)* @foo()
 
 define void @test() {
-        call void (i32)* ()* @foo( )           ; <%ty*>:1 [#uses=0]
+        call void (i32)* () @foo( )           ; <%ty*>:1 [#uses=0]
         ret void
 }
 
diff --git a/test/Assembler/2003-05-15-AssemblerProblem.ll b/test/Assembler/2003-05-15-AssemblerProblem.ll
index eba26a2..70c9617 100644
--- a/test/Assembler/2003-05-15-AssemblerProblem.ll
+++ b/test/Assembler/2003-05-15-AssemblerProblem.ll
@@ -4,12 +4,12 @@
 ; RUN: verify-uselistorder %s
 
 define void @test() {
-        call void (...)* bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* null, i32 0 )
+        call void (...) bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* null, i32 0 )
         ret void
 }
 
 define void @AddString(i16* %tmp.124, i32 %tmp.127) {
-        call void (...)* bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* %tmp.124, i32 %tmp.127 )
+        call void (...) bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* %tmp.124, i32 %tmp.127 )
         ret void
 }
 
diff --git a/test/Assembler/2008-01-11-VarargAttrs.ll b/test/Assembler/2008-01-11-VarargAttrs.ll
index 0b6592c..3111f2d 100644
--- a/test/Assembler/2008-01-11-VarargAttrs.ll
+++ b/test/Assembler/2008-01-11-VarargAttrs.ll
@@ -6,6 +6,6 @@
 declare void @foo(...)
 
 define void @bar() {
-	call void (...)* @foo(%struct* byval null )
+	call void (...) @foo(%struct* byval null )
 	ret void
 }
diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index 2dd4743..24d1713 100644
--- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-darwin10.2"
 define i32 @main() nounwind readonly {
   %diff1 = alloca i64                             ; <i64*> [#uses=2]
 ; CHECK: call void @llvm.dbg.value(metadata i64 72,
-  call void @llvm.dbg.declare(metadata i64* %diff1, metadata !0, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata i64* %diff1, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   store i64 72, i64* %diff1, align 8
   %v1 = load %struct.test*, %struct.test** @TestArrayPtr, align 8 ; <%struct.test*> [#uses=1]
   %v2 = ptrtoint %struct.test* %v1 to i64 ; <i64> [#uses=1]
diff --git a/test/Assembler/debug-info.ll b/test/Assembler/debug-info.ll
index 7103ed2..417c479 100644
--- a/test/Assembler/debug-info.ll
+++ b/test/Assembler/debug-info.ll
@@ -28,11 +28,11 @@
 !9 = !MDBasicType()
 !10 = !MDBasicType(tag: DW_TAG_base_type, name: "", size: 0, align: 0, encoding: 0)
 
-; CHECK-NEXT: !9 = distinct !{}
+; CHECK-NEXT: !9 = !MDTemplateTypeParameter(type: !6)
 ; CHECK-NEXT: !10 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
 ; CHECK-NEXT: !11 = distinct !{}
 ; CHECK-NEXT: !12 = !MDFile(filename: "", directory: "")
-!11 = distinct !{}
+!11 = !MDTemplateTypeParameter(type: !7)
 !12 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
 !13 = distinct !{}
 !14 = !MDFile(filename: "", directory: "")
@@ -44,7 +44,7 @@
 ; CHECK-NEXT: !15 = distinct !MDCompositeType(tag: DW_TAG_structure_type, name: "Base", scope: !14, file: !10, line: 3, size: 128, align: 32, offset: 64, flags: DIFlagPublic, elements: !16, runtimeLang: DW_LANG_C_plus_plus_11, vtableHolder: !15, templateParams: !18, identifier: "MangledBase")
 ; CHECK-NEXT: !16 = !{!17}
 ; CHECK-NEXT: !17 = !MDDerivedType(tag: DW_TAG_member, name: "field", scope: !15, file: !10, line: 4, baseType: !6, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
-; CHECK-NEXT: !18 = !{!6}
+; CHECK-NEXT: !18 = !{!9}
 ; CHECK-NEXT: !19 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Derived", scope: !14, file: !10, line: 3, baseType: !15, size: 128, align: 32, offset: 64, flags: DIFlagPublic, elements: !20, runtimeLang: DW_LANG_C_plus_plus_11, vtableHolder: !15, templateParams: !18, identifier: "MangledBase")
 ; CHECK-NEXT: !20 = !{!21}
 ; CHECK-NEXT: !21 = !MDDerivedType(tag: DW_TAG_inheritance, scope: !19, baseType: !15)
@@ -55,7 +55,7 @@
 !17 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Base", scope: !16, file: !12, line: 3, size: 128, align: 32, offset: 64, flags: DIFlagPublic, elements: !18, runtimeLang: DW_LANG_C_plus_plus_11, vtableHolder: !17, templateParams: !20, identifier: "MangledBase")
 !18 = !{!19}
 !19 = !MDDerivedType(tag: DW_TAG_member, name: "field", scope: !17, file: !12, line: 4, baseType: !7, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
-!20 = !{!7}
+!20 = !{!11}
 !21 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Derived", scope: !16, file: !12, line: 3, baseType: !17, size: 128, align: 32, offset: 64, flags: DIFlagPublic, elements: !22, runtimeLang: DW_LANG_C_plus_plus_11, vtableHolder: !17, templateParams: !20, identifier: "MangledBase")
 !22 = !{!23}
 !23 = !MDDerivedType(tag: DW_TAG_inheritance, scope: !21, baseType: !17)
diff --git a/test/Assembler/invalid-mdcompileunit-null-file.ll b/test/Assembler/invalid-mdcompileunit-null-file.ll
new file mode 100644
index 0000000..613948f
--- /dev/null
+++ b/test/Assembler/invalid-mdcompileunit-null-file.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:27: error: 'file' cannot be null
+!0 = !MDCompileUnit(file: null)
diff --git a/test/Assembler/invalid-mdglobalvariable-empty-name.ll b/test/Assembler/invalid-mdglobalvariable-empty-name.ll
new file mode 100644
index 0000000..77a9f4d
--- /dev/null
+++ b/test/Assembler/invalid-mdglobalvariable-empty-name.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:30: error: 'name' cannot be empty
+!0 = !MDGlobalVariable(name: "")
diff --git a/test/Assembler/invalid-mdglobalvariable-missing-name.ll b/test/Assembler/invalid-mdglobalvariable-missing-name.ll
new file mode 100644
index 0000000..d57d71e
--- /dev/null
+++ b/test/Assembler/invalid-mdglobalvariable-missing-name.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:24: error: missing required field 'name'
+!0 = !MDGlobalVariable()
diff --git a/test/Assembler/invalid-mdimportedentity-missing-parent.ll b/test/Assembler/invalid-mdimportedentity-missing-scope.ll
index 710a027..710a027 100644
--- a/test/Assembler/invalid-mdimportedentity-missing-parent.ll
+++ b/test/Assembler/invalid-mdimportedentity-missing-scope.ll
diff --git a/test/Assembler/invalid-mdlexicalblock-missing-parent.ll b/test/Assembler/invalid-mdlexicalblock-missing-scope.ll
index cdd12af..cdd12af 100644
--- a/test/Assembler/invalid-mdlexicalblock-missing-parent.ll
+++ b/test/Assembler/invalid-mdlexicalblock-missing-scope.ll
diff --git a/test/Assembler/invalid-mdlexicalblock-null-scope.ll b/test/Assembler/invalid-mdlexicalblock-null-scope.ll
new file mode 100644
index 0000000..59a9968
--- /dev/null
+++ b/test/Assembler/invalid-mdlexicalblock-null-scope.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:29: error: 'scope' cannot be null
+!0 = !MDLexicalBlock(scope: null)
diff --git a/test/Assembler/invalid-mdlexicalblockfile-missing-parent.ll b/test/Assembler/invalid-mdlexicalblockfile-missing-scope.ll
index 1c901e2..1c901e2 100644
--- a/test/Assembler/invalid-mdlexicalblockfile-missing-parent.ll
+++ b/test/Assembler/invalid-mdlexicalblockfile-missing-scope.ll
diff --git a/test/Assembler/invalid-mdlexicalblockfile-null-scope.ll b/test/Assembler/invalid-mdlexicalblockfile-null-scope.ll
new file mode 100644
index 0000000..708136c
--- /dev/null
+++ b/test/Assembler/invalid-mdlexicalblockfile-null-scope.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:33: error: 'scope' cannot be null
+!0 = !MDLexicalBlockFile(scope: null)
diff --git a/test/Assembler/invalid-mdlocalvariable-missing-name.ll b/test/Assembler/invalid-mdlocalvariable-missing-name.ll
deleted file mode 100644
index 5b23600..0000000
--- a/test/Assembler/invalid-mdlocalvariable-missing-name.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-
-; CHECK: <stdin>:[[@LINE+1]]:29: error: missing required field 'tag'
-!0 = !MDLocalVariable(arg: 7)
diff --git a/test/Assembler/invalid-mdlocalvariable-missing-scope.ll b/test/Assembler/invalid-mdlocalvariable-missing-scope.ll
new file mode 100644
index 0000000..d8ee8a9
--- /dev/null
+++ b/test/Assembler/invalid-mdlocalvariable-missing-scope.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:48: error: missing required field 'scope'
+!0 = !MDLocalVariable(tag: DW_TAG_auto_variable)
diff --git a/test/Assembler/invalid-mdlocalvariable-missing-tag.ll b/test/Assembler/invalid-mdlocalvariable-missing-tag.ll
new file mode 100644
index 0000000..d3ca10a
--- /dev/null
+++ b/test/Assembler/invalid-mdlocalvariable-missing-tag.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:45: error: missing required field 'tag'
+!0 = !MDLocalVariable(scope: !MDSubprogram())
diff --git a/test/Assembler/invalid-mdlocalvariable-null-scope.ll b/test/Assembler/invalid-mdlocalvariable-null-scope.ll
new file mode 100644
index 0000000..53ee94d
--- /dev/null
+++ b/test/Assembler/invalid-mdlocalvariable-null-scope.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:30: error: 'scope' cannot be null
+!0 = !MDLocalVariable(scope: null)
diff --git a/test/Assembler/invalid-mdlocation-null-scope.ll b/test/Assembler/invalid-mdlocation-null-scope.ll
new file mode 100644
index 0000000..e359c9f
--- /dev/null
+++ b/test/Assembler/invalid-mdlocation-null-scope.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:25: error: 'scope' cannot be null
+!0 = !MDLocation(scope: null)
diff --git a/test/Assembler/mdglobalvariable.ll b/test/Assembler/mdglobalvariable.ll
index 09d20bf..2726f4f 100644
--- a/test/Assembler/mdglobalvariable.ll
+++ b/test/Assembler/mdglobalvariable.ll
@@ -3,20 +3,25 @@
 
 @foo = global i32 0
 
-; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6}
-!named = !{!0, !1, !2, !3, !4, !5, !6}
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 
-!0 = distinct !{}
+!0 = !MDFile(filename: "scope.h", directory: "/path/to/dir")
 !1 = distinct !{}
 !2 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
-!3 = distinct !{}
+!3 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !4 = distinct !{}
 
-; CHECK: !5 = !MDGlobalVariable(name: "foo", linkageName: "foo", scope: !0, file: !2, line: 7, type: !3, isLocal: true, isDefinition: false, variable: i32* @foo, declaration: !4)
+; CHECK: !5 = !MDGlobalVariable(name: "foo", linkageName: "foo", scope: !0, file: !2, line: 7, type: !3, isLocal: true, isDefinition: false, variable: i32* @foo)
 !5 = !MDGlobalVariable(name: "foo", linkageName: "foo", scope: !0,
                        file: !2, line: 7, type: !3, isLocal: true,
-                       isDefinition: false, variable: i32* @foo,
-                       declaration: !4)
+                       isDefinition: false, variable: i32* @foo)
 
-; CHECK: !6 = !MDGlobalVariable(scope: null, isLocal: false, isDefinition: true)
-!6 = !MDGlobalVariable()
+; CHECK: !6 = !MDGlobalVariable(name: "foo", scope: !0, isLocal: false, isDefinition: true)
+!6 = !MDGlobalVariable(name: "foo", scope: !0)
+
+!7 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Class", size: 8, align: 8)
+!8 = !MDDerivedType(tag: DW_TAG_member, name: "mem", flags: DIFlagStaticMember, scope: !7, baseType: !3)
+
+; CHECK: !9 = !MDGlobalVariable(name: "mem", scope: !0, isLocal: false, isDefinition: true, declaration: !8)
+!9 = !MDGlobalVariable(name: "mem", scope: !0, declaration: !8)
diff --git a/test/Assembler/mdimportedentity.ll b/test/Assembler/mdimportedentity.ll
index 0584edb..8e98c5d 100644
--- a/test/Assembler/mdimportedentity.ll
+++ b/test/Assembler/mdimportedentity.ll
@@ -4,10 +4,10 @@
 ; CHECK: !named = !{!0, !1, !2, !3, !3}
 !named = !{!0, !1, !2, !3, !4}
 
-; CHECK:      !0 = distinct !{}
-; CHECK-NEXT: !1 = distinct !{}
-!0 = distinct !{}
-!1 = distinct !{}
+; CHECK:      !0 = !MDSubprogram({{.*}})
+; CHECK-NEXT: !1 = !MDCompositeType({{.*}})
+!0 = !MDSubprogram(name: "foo")
+!1 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Class", size: 32, align: 32)
 
 ; CHECK-NEXT: !2 = !MDImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0, entity: !1, line: 7)
 !2 = !MDImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0,
diff --git a/test/Assembler/mdlexicalblock.ll b/test/Assembler/mdlexicalblock.ll
index 0a2c339..d88eae7 100644
--- a/test/Assembler/mdlexicalblock.ll
+++ b/test/Assembler/mdlexicalblock.ll
@@ -5,15 +5,15 @@
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 
 !0 = distinct !{}
-!1 = distinct !{}
+!1 = !MDSubprogram(name: "foo", scope: !2)
 !2 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
 
-; CHECK: !3 = !MDLexicalBlock(scope: !0, file: !2, line: 7, column: 35)
-!3 = !MDLexicalBlock(scope: !0, file: !2, line: 7, column: 35)
+; CHECK: !3 = !MDLexicalBlock(scope: !1, file: !2, line: 7, column: 35)
+!3 = !MDLexicalBlock(scope: !1, file: !2, line: 7, column: 35)
 
-; CHECK: !4 = !MDLexicalBlock(scope: !0)
-!4 = !MDLexicalBlock(scope: !0)
-!5 = !MDLexicalBlock(scope: !0, file: null, line: 0, column: 0)
+; CHECK: !4 = !MDLexicalBlock(scope: !1)
+!4 = !MDLexicalBlock(scope: !1)
+!5 = !MDLexicalBlock(scope: !1, file: null, line: 0, column: 0)
 
 ; CHECK: !5 = !MDLexicalBlockFile(scope: !3, file: !2, discriminator: 0)
 ; CHECK: !6 = !MDLexicalBlockFile(scope: !3, file: !2, discriminator: 1)
diff --git a/test/Assembler/mdlocalvariable.ll b/test/Assembler/mdlocalvariable.ll
index 3a36c7d..2c0b35a 100644
--- a/test/Assembler/mdlocalvariable.ll
+++ b/test/Assembler/mdlocalvariable.ll
@@ -6,21 +6,21 @@
 ; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
 
-!0 = distinct !{}
+!0 = distinct !MDSubprogram()
 !1 = distinct !{}
 !2 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
-!3 = distinct !{}
-!4 = distinct !{}
+!3 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!4 = !MDLocation(scope: !0)
 
-; CHECK: !5 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 3, scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial, inlinedAt: !4)
-; CHECK: !6 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial, inlinedAt: !4)
+; CHECK: !5 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 3, scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
+; CHECK: !6 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
 !5 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 3,
                       scope: !0, file: !2, line: 7, type: !3,
-                      flags: DIFlagArtificial, inlinedAt: !4)
+                      flags: DIFlagArtificial)
 !6 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !0,
-                      file: !2, line: 7, type: !3, flags: DIFlagArtificial, inlinedAt: !4)
+                      file: !2, line: 7, type: !3, flags: DIFlagArtificial)
 
-; CHECK: !7 = !MDLocalVariable(tag: DW_TAG_arg_variable, arg: 0, scope: null)
-; CHECK: !8 = !MDLocalVariable(tag: DW_TAG_auto_variable, scope: null)
-!7 = !MDLocalVariable(tag: DW_TAG_arg_variable)
-!8 = !MDLocalVariable(tag: DW_TAG_auto_variable)
+; CHECK: !7 = !MDLocalVariable(tag: DW_TAG_arg_variable, arg: 0, scope: !0)
+; CHECK: !8 = !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !0)
+!7 = !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !0)
+!8 = !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !0)
diff --git a/test/Assembler/mdnamespace.ll b/test/Assembler/mdnamespace.ll
index 90cb15f..e708a3c 100644
--- a/test/Assembler/mdnamespace.ll
+++ b/test/Assembler/mdnamespace.ll
@@ -4,7 +4,7 @@
 ; CHECK: !named = !{!0, !1, !2, !3, !4, !4}
 !named = !{!0, !1, !2, !3, !4, !5}
 
-!0 = distinct !{}
+!0 = !MDFile(filename: "file.cpp", directory: "/path/to/dir")
 !1 = distinct !{}
 !2 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
 
diff --git a/test/Assembler/mdobjcproperty.ll b/test/Assembler/mdobjcproperty.ll
index cb2f4d3..a3b20e5 100644
--- a/test/Assembler/mdobjcproperty.ll
+++ b/test/Assembler/mdobjcproperty.ll
@@ -6,10 +6,9 @@
 
 !0 = distinct !{}
 !1 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
-!2 = distinct !{}
+!2 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Object")
 
-
-; CHECK: !2 = distinct !{}
+; CHECK: !2 = !MDCompositeType({{.*}})
 ; CHECK-NEXT: !3 = !MDObjCProperty(name: "foo", file: !1, line: 7, setter: "setFoo", getter: "getFoo", attributes: 7, type: !2)
 !3 = !MDObjCProperty(name: "foo", file: !1, line: 7, setter: "setFoo",
                      getter: "getFoo", attributes: 7, type: !2)
diff --git a/test/Assembler/mdsubprogram.ll b/test/Assembler/mdsubprogram.ll
index 7447166..9300089 100644
--- a/test/Assembler/mdsubprogram.ll
+++ b/test/Assembler/mdsubprogram.ll
@@ -6,17 +6,17 @@ declare void @_Z3foov()
 ; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 
-!0 = distinct !{}
-!1 = distinct !{}
+!0 = !{null}
+!1 = distinct !MDCompositeType(tag: DW_TAG_structure_type)
 !2 = !MDFile(filename: "path/to/file", directory: "/path/to/dir")
-!3 = distinct !{}
-!4 = distinct !{}
+!3 = !MDSubroutineType(types: !0)
+!4 = distinct !MDCompositeType(tag: DW_TAG_structure_type)
 !5 = distinct !{}
-!6 = distinct !{}
+!6 = distinct !MDSubprogram(isDefinition: false)
 !7 = distinct !{}
 
-; CHECK: !8 = !MDSubprogram(name: "foo", linkageName: "_Zfoov", scope: !0, file: !2, line: 7, type: !3, isLocal: true, isDefinition: false, scopeLine: 8, containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3foov, templateParams: !5, declaration: !6, variables: !7)
-!8 = !MDSubprogram(name: "foo", linkageName: "_Zfoov", scope: !0,
+; CHECK: !8 = !MDSubprogram(name: "foo", linkageName: "_Zfoov", scope: !1, file: !2, line: 7, type: !3, isLocal: true, isDefinition: false, scopeLine: 8, containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3foov, templateParams: !5, declaration: !6, variables: !7)
+!8 = !MDSubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
                    file: !2, line: 7, type: !3, isLocal: true,
                    isDefinition: false, scopeLine: 8, containingType: !4,
                    virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10,
diff --git a/test/Assembler/mdsubroutinetype.ll b/test/Assembler/mdsubroutinetype.ll
new file mode 100644
index 0000000..936cbf4
--- /dev/null
+++ b/test/Assembler/mdsubroutinetype.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
+
+!0 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!1 = !{null}
+!2 = !{null, !0}
+!3 = !{!0, !0, !0}
+
+
+; CHECK: !4 = !MDSubroutineType(types: !1)
+; CHECK: !5 = !MDSubroutineType(types: !2)
+; CHECK: !6 = !MDSubroutineType(types: !3)
+; CHECK: !7 = !MDSubroutineType(flags: DIFlagLValueReference, types: !3)
+!4 = !MDSubroutineType(types: !1)
+!5 = !MDSubroutineType(types: !2)
+!6 = !MDSubroutineType(types: !3)
+!7 = !MDSubroutineType(flags: DIFlagLValueReference, types: !3)
+
+; CHECK: !8 = !MDSubroutineType(types: null)
+!8 = !MDSubroutineType(types: null)
diff --git a/test/Assembler/mdtemplateparameter.ll b/test/Assembler/mdtemplateparameter.ll
index 22ee5c8..5bcef3f 100644
--- a/test/Assembler/mdtemplateparameter.ll
+++ b/test/Assembler/mdtemplateparameter.ll
@@ -5,8 +5,8 @@
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
 
 !0 = distinct !{}
-!1 = distinct !{}
-; CHECK: !1 = distinct !{}
+!1 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+; CHECK: !1 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 
 ; CHECK-NEXT: !2 = !MDTemplateTypeParameter(name: "Ty", type: !1)
 ; CHECK-NEXT: !3 = !MDTemplateTypeParameter(type: !1)
diff --git a/test/Assembler/metadata-null-operands.ll b/test/Assembler/metadata-null-operands.ll
index acae1d4..7e27eba 100644
--- a/test/Assembler/metadata-null-operands.ll
+++ b/test/Assembler/metadata-null-operands.ll
@@ -1,13 +1,11 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 ; RUN: verify-uselistorder %s
 
-; Don't crash on null operands.  (If/when we add a verify check for these, we
-; should disable the verifier for this test and remove this comment; the test
-; is still important.)
-!named = !{!0, !1}
+; Don't crash on null operands.  When we add a verify check for this, also
+; require non-null in the assembler and rework this test to check for that ala
+; test/Assembler/invalid-mdcompileunit-null-file.ll.
+!named = !{!0}
 !0 = !MDDerivedType(tag: DW_TAG_pointer_type, baseType: null)
-!1 = !MDCompileUnit(language: DW_LANG_C, file: null)
 
-; CHECK: !named = !{!0, !1}
+; CHECK: !named = !{!0}
 ; CHECK: !0 = !MDDerivedType({{.*}}baseType: null{{.*}})
-; CHECK: !1 = !MDCompileUnit({{.*}}file: null{{.*}})
diff --git a/test/Assembler/musttail-invalid-1.ll b/test/Assembler/musttail-invalid-1.ll
index b123a91..e9141c8 100644
--- a/test/Assembler/musttail-invalid-1.ll
+++ b/test/Assembler/musttail-invalid-1.ll
@@ -8,7 +8,7 @@
 declare i8* @f(i8*, ...)
 
 define i8* @f_thunk(i8* %this) {
-  %rv = musttail call i8* (i8*, ...)* @f(i8* %this, ...)
+  %rv = musttail call i8* (i8*, ...) @f(i8* %this, ...)
 ; CHECK: error: unexpected ellipsis in argument list for musttail call in non-varargs function
   ret i8* %rv
 }
diff --git a/test/Assembler/musttail-invalid-2.ll b/test/Assembler/musttail-invalid-2.ll
index 3bcb51f..8602afd 100644
--- a/test/Assembler/musttail-invalid-2.ll
+++ b/test/Assembler/musttail-invalid-2.ll
@@ -7,7 +7,7 @@
 declare i8* @f(i8*, ...)
 
 define i8* @f_thunk(i8* %this, ...) {
-  %rv = musttail call i8* (i8*, ...)* @f(i8* %this)
+  %rv = musttail call i8* (i8*, ...) @f(i8* %this)
 ; CHECK: error: expected '...' at end of argument list for musttail call in varargs function
   ret i8* %rv
 }
diff --git a/test/Assembler/musttail.ll b/test/Assembler/musttail.ll
index 6e2a9b2..ac60ba2 100644
--- a/test/Assembler/musttail.ll
+++ b/test/Assembler/musttail.ll
@@ -7,8 +7,8 @@
 declare i8* @f(i8*, ...)
 
 define i8* @f_thunk(i8* %this, ...) {
-  %rv = musttail call i8* (i8*, ...)* @f(i8* %this, ...)
+  %rv = musttail call i8* (i8*, ...) @f(i8* %this, ...)
   ret i8* %rv
 }
 ; CHECK-LABEL: define i8* @f_thunk(i8* %this, ...)
-; CHECK: %rv = musttail call i8* (i8*, ...)* @f(i8* %this, ...)
+; CHECK: %rv = musttail call i8* (i8*, ...) @f(i8* %this, ...)
diff --git a/test/Bitcode/Inputs/invalid-no-proper-module.bc b/test/Bitcode/Inputs/invalid-no-proper-module.bc
new file mode 100644
index 0000000..6d5a291
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-no-proper-module.bc
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index c75ee80..9fbdfeb 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -245,6 +245,12 @@ define void @f41(i8* align 32, double* align 64) {
         ret void
 }
 
+; CHECK: define dereferenceable_or_null(8) i8* @f42(i8* dereferenceable_or_null(8) %foo)
+define dereferenceable_or_null(8) i8* @f42(i8* dereferenceable_or_null(8) %foo) {
+ entry:
+  ret i8* %foo
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
diff --git a/test/Bitcode/invalid.test b/test/Bitcode/invalid.test
index 0eacb9d..9cab227 100644
--- a/test/Bitcode/invalid.test
+++ b/test/Bitcode/invalid.test
@@ -50,3 +50,8 @@ EXTRACT-IDXS: EXTRACTVAL: Invalid type
 INSERT-ARRAY: INSERTVAL: Invalid array index
 INSERT-STRUCT: INSERTVAL: Invalid struct index
 INSERT-IDXS: INSERTVAL: Invalid type
+
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-no-proper-module.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=NO-MODULE %s
+
+NO-MODULE: Malformed IR file
diff --git a/test/Bitcode/miscInstructions.3.2.ll b/test/Bitcode/miscInstructions.3.2.ll
index 6a077d5..bed26c2 100644
--- a/test/Bitcode/miscInstructions.3.2.ll
+++ b/test/Bitcode/miscInstructions.3.2.ll
@@ -173,8 +173,8 @@ entry:
 ; CHECK-NEXT: %res2 = tail call i32 @test(i32 %x)
   %res2 = tail call i32 @test(i32 %x)
   
-; CHECK-NEXT: %res3 = call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)
-  %res3 = call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)
+; CHECK-NEXT: %res3 = call i32 (i8*, ...) @printf(i8* %msg, i32 12, i8 42)
+  %res3 = call i32 (i8*, ...) @printf(i8* %msg, i32 12, i8 42)
   
   ret void
 }
diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
new file mode 100644
index 0000000..a31c66b
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -0,0 +1,491 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test aims to check basic correctness of frame layout &
+; frame access code. There are 8 functions in this test file,
+; each function implements one element in the cartesian product
+; of:
+; . a function having a VLA/noVLA
+; . a function with dynamic stack realignment/no dynamic stack realignment.
+; . a function needing a frame pionter/no frame pointer,
+; since the presence/absence of these has influence on the frame
+; layout and which pointer to use to access various part of the
+; frame (bp,sp,fp).
+;
+; Furthermore: in every test function:
+; . there is always one integer and 1 floating point argument to be able
+;   to check those are accessed correctly.
+; . there is always one local variable to check that is accessed
+;   correctly
+;
+; The LLVM-IR below was produced by clang on the following C++ code:
+;extern "C" int g();
+;extern "C" int novla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                             double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                             double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  return i10 + (int)d10 + l1;
+;}
+;extern "C" int novla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                         double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                           double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  return i10 + (int)d10 + l1;
+;}
+;
+;extern "C" int vla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                         double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                           double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                       double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                         double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + vla[0];
+;}
+
+
+
+define i32 @novla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  ret i32 %add2
+}
+; CHECK-LABEL: novla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+;   Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp, #12]
+;   Check epilogue:
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+declare i32 @g() #0
+
+; Function Attrs: nounwind
+define i32 @novla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  ret i32 %add1
+}
+; CHECK-LABEL: novla_nodynamicrealign_nocall
+;   Check that space is reserved for one local variable on the stack.
+; CHECK:	sub	sp, sp, #16             // =16
+;   Check correct access to arguments passed on the stack, through stack pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [sp, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [sp, #24]
+;   Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp, #12]
+;   Check epilogue:
+; CHECK: add	sp, sp, #16             // =16
+; CHECK: ret
+
+
+define i32 @novla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 128
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  ret i32 %add2
+}
+
+; CHECK-LABEL: novla_dynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub	x9, sp, #96
+; CHECK: and	sp, x9, #0xffffffffffffff80
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+;   Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16            // =16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 128
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  ret i32 %add1
+}
+
+; CHECK-LABEL: novla_dynamicrealign_nocall
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #-16]!
+; CHECK: mov	x29, sp
+;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub	x9, sp, #112
+; CHECK: and	sp, x9, #0xffffffffffffff80
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+;   Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: mov	sp, x29
+; CHECK: ldp	x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 4
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add3 = add nsw i32 %add2, %1
+  ret i32 %add3
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check that space is reserved on the stack for the local variable,
+;   rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub	sp, sp, #16
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through frame pointer
+; CHECK: ldur	w[[ILOC:[0-9]+]], [x29, #-20]
+;   Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr	w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16            // =16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 4
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add2 = add nsw i32 %add1, %1
+  ret i32 %add2
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_nocall
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #-16]!
+; CHECK: mov	x29, sp
+;   Check that space is reserved on the stack for the local variable,
+;   rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub	sp, sp, #16
+;   Check correctness of cfi pseudo-instructions
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through frame pointer
+; CHECK: ldur	w[[ILOC:[0-9]+]], [x29, #-4]
+;   Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr	w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: mov    sp, x29
+; CHECK: ldp	x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 128
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add3 = add nsw i32 %add2, %1
+  ret i32 %add3
+}
+
+; CHECK-LABEL: vla_dynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x22, x21, [sp, #-48]!
+; CHECK: stp	x20, x19, [sp, #16]
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #32]
+; CHECK: add	x29, sp, #32
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK: sub	x9, sp, #80            // =80
+; CHECK: and	sp, x9, #0xffffffffffffff80
+; CHECK: mov    x19, sp
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w21, -40
+; CHECK: .cfi_offset w22, -48
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #32
+; CHECK: ldp	x29, x30, [sp, #32]
+; CHECK: ldp	x20, x19, [sp, #16]
+; CHECK: ldp	x22, x21, [sp], #48
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 128
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add2 = add nsw i32 %add1, %1
+  ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK: sub	x9, sp, #96
+; CHECK: and	sp, x9, #0xffffffffffffff80
+; CHECK: mov    x19, sp
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 32768
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 32768
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add2 = add nsw i32 %add1, %1
+  ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK: sub	x9, sp, #7, lsl #12
+; CHECK: and	sp, x9, #0xffffffffffff8000
+; CHECK: mov    x19, sp
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
index 09b9f62..d6350a6 100644
--- a/test/CodeGen/AArch64/addsub.ll
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -24,6 +24,34 @@ define void @add_small() {
   ret void
 }
 
+; Make sure we grab the imm variant when the register operand
+; can be implicitly zero-extend.
+; We used to generate something horrible like this:
+; wA = ldrb
+; xB = ldimm 12
+; xC = add xB, wA, uxtb
+; whereas this can be achieved with:
+; wA = ldrb
+; xC = add xA, #12 ; <- xA implicitly zero extend wA.
+define void @add_small_imm(i8* %p, i64* %q, i32 %b, i32* %addr) {
+; CHECK-LABEL: add_small_imm:
+entry:
+
+; CHECK: ldrb w[[LOAD32:[0-9]+]], [x0]
+  %t = load i8, i8* %p
+  %promoted = zext i8 %t to i64
+  %zextt = zext i8 %t to i32
+  %add = add nuw i32 %zextt, %b
+
+; CHECK: add [[ADD2:x[0-9]+]], x[[LOAD32]], #12
+  %add2 = add nuw i64 %promoted, 12
+  store i32 %add, i32* %addr
+
+; CHECK: str [[ADD2]], [x1]
+  store i64 %add2, i64* %q
+  ret void
+}
+
 ; Add 12-bit immediates, shifted left by 12 bits
 define void @add_med() {
 ; CHECK-LABEL: add_med:
diff --git a/test/CodeGen/AArch64/argument-blocks.ll b/test/CodeGen/AArch64/argument-blocks.ll
index f1dcfa6..3169abc 100644
--- a/test/CodeGen/AArch64/argument-blocks.ll
+++ b/test/CodeGen/AArch64/argument-blocks.ll
@@ -64,7 +64,7 @@ define void @test_varargs_stackalign() {
 ; CHECK-LABEL: test_varargs_stackalign:
 ; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
 
-  call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
+  call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
index 41e22e9..b760261 100644
--- a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
+++ b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
@@ -16,11 +16,11 @@ entry:
   %0 = load double, double* %d.addr, align 8
   %1 = load double, double* %d.addr, align 8
   %conv = fptoui double %1 to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
   %2 = load double, double* %d.addr, align 8
   %3 = load double, double* %d.addr, align 8
   %conv1 = fptoui double %3 to i32
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
   ret void
 }
 
@@ -37,12 +37,12 @@ entry:
   %conv = fpext float %0 to double
   %1 = load float, float* %f.addr, align 4
   %conv1 = fptoui float %1 to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
   %2 = load float, float* %f.addr, align 4
   %conv2 = fpext float %2 to double
   %3 = load float, float* %f.addr, align 4
   %conv3 = fptoui float %3 to i32
-  %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
+  %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
index 6266d1c..8784abd 100644
--- a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
+++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
@@ -7,13 +7,13 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
 ; CHECK-LABEL: bar:
 ; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
 ; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
+; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
 ; Without advanced copy optimization, we end up with cross register
 ; banks copies that cannot be coalesced.
 ; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
 ; With advanced copy optimization, we end up with just one copy
 ; to insert the computed high part into the V register. 
 ; CHECK-OPT-NOT: fmov
-; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
 ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
 ; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
 ; CHECK-OPT-NOT: fmov
@@ -23,9 +23,9 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
 ; GENERIC-LABEL: bar:
 ; GENERIC: add	v[[REG:[0-9]+]].2d, v0.2d, v1.2d
 ; GENERIC: add	d[[REG3:[0-9]+]], d[[REG]], d1
+; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
 ; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
 ; GENERIC-OPT-NOT: fmov
-; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
 ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
 ; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
 ; GENERIC-OPT-NOT: fmov
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
index 41c3ad5..390a3c7 100644
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -78,7 +78,7 @@ declare void @variadic(i32 %a, ...)
   ; Under AAPCS variadic functions have the same calling convention as
   ; others. The extra arguments should go in registers rather than on the stack.
 define void @test_variadic() {
-  call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0)
+  call void(i32, ...) @variadic(i32 0, i64 1, double 2.0)
 ; CHECK: fmov d0, #2.0
 ; CHECK: orr w1, wzr, #0x1
 ; CHECK: bl variadic
diff --git a/test/CodeGen/AArch64/arm64-abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll
index f95fec6..03414b5 100644
--- a/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -94,7 +94,7 @@ define i32 @main() nounwind ssp {
   %10 = load i32, i32* %a10, align 4
   %11 = load i32, i32* %a11, align 4
   %12 = load i32, i32* %a12, align 4
-  call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...)* @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
+  call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
   ret i32 0
 }
 
@@ -133,7 +133,7 @@ entry:
   store <4 x i32> %y, <4 x i32>* %y.addr, align 16
   %0 = load i32, i32* %x.addr, align 4
   %1 = load <4 x i32>, <4 x i32>* %y.addr, align 16
-  call void (i8*, ...)* @foo(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
+  call void (i8*, ...) @foo(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
   ret void
 }
 
@@ -186,6 +186,6 @@ entry:
   %1 = load i32, i32* %x.addr, align 4
   %2 = bitcast %struct.s41* %s41 to i128*
   %3 = load i128, i128* %2, align 1
-  call void (i8*, ...)* @foo2(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3)
+  call void (i8*, ...) @foo2(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3)
   ret void
 }
diff --git a/test/CodeGen/AArch64/arm64-anyregcc-crash.ll b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
index 241cf97..56c62d5 100644
--- a/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
@@ -8,7 +8,7 @@ define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i6
                         i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
                         i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
 entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
                 i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
                 i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
                 i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
diff --git a/test/CodeGen/AArch64/arm64-anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll
index e26875d..2a2f451 100644
--- a/test/CodeGen/AArch64/arm64-anyregcc.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc.ll
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:   .long 3
 define i64 @test() nounwind ssp uwtable {
 entry:
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1, i32 2, i64 3)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1, i32 2, i64 3)
   ret i64 0
 }
 
@@ -77,7 +77,7 @@ entry:
 define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj)
   ret i64 %ret
 }
 
@@ -100,7 +100,7 @@ define i64 @property_access2() nounwind ssp uwtable {
 entry:
   %obj = alloca i64, align 8
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj)
   ret i64 %ret
 }
 
@@ -123,7 +123,7 @@ define i64 @property_access3() nounwind ssp uwtable {
 entry:
   %obj = alloca i64, align 8
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj)
   ret i64 %ret
 }
 
@@ -205,7 +205,7 @@ entry:
 define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
   ret i64 %ret
 }
 
@@ -287,7 +287,7 @@ entry:
 define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
   ret i64 %ret
 }
 
@@ -315,7 +315,7 @@ entry:
 ; CHECK-NEXT: .long  0
 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
   tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
   ret i64 %result
 }
@@ -355,7 +355,7 @@ entry:
 define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
   tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
   ret i64 %result
 }
 
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
index c280bef..d089767 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
@@ -1,6 +1,10 @@
 ; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
 ; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
 
+; Note, we split the functions in to multiple BBs below to isolate the call
+; instruction we want to test, from fast-isel failing to select instructions
+; after it.
+
 ; CHECK-LABEL: test_i64_f64:
 declare i64 @test_i64_f64_helper(double %p)
 define void @test_i64_f64(double* %p, i64* %q) {
@@ -8,6 +12,8 @@ define void @test_i64_f64(double* %p, i64* %q) {
     %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call i64 @test_i64_f64_helper(double %2)
+    br label %return_bb
+return_bb:
     %4 = add i64 %3, %3
     store i64 %4, i64* %q
     ret void
@@ -20,6 +26,8 @@ define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
     %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = add i64 %3, %3
     store i64 %4, i64* %q
     ret void
@@ -32,6 +40,8 @@ define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
     %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add i64 %3, %3
     store i64 %4, i64* %q
     ret void
@@ -44,6 +54,8 @@ define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
     %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = add i64 %3, %3
     store i64 %4, i64* %q
     ret void
@@ -56,6 +68,8 @@ define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
     %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = add i64 %3, %3
     store i64 %4, i64* %q
     ret void
@@ -68,6 +82,8 @@ define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
     %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = add i64 %3, %3
     store i64 %4, i64* %q
     ret void
@@ -80,6 +96,8 @@ define void @test_f64_i64(i64* %p, double* %q) {
     %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call double @test_f64_i64_helper(i64 %2)
+    br label %return_bb
+return_bb:
     %4 = fadd double %3, %3
     store double %4, double* %q
     ret void
@@ -92,6 +110,8 @@ define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
     %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd double %3, %3
     store double %4, double* %q
     ret void
@@ -104,6 +124,8 @@ define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
     %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd double %3, %3
     store double %4, double* %q
     ret void
@@ -116,6 +138,8 @@ define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
     %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd double %3, %3
     store double %4, double* %q
     ret void
@@ -128,6 +152,8 @@ define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
     %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd double %3, %3
     store double %4, double* %q
     ret void
@@ -140,6 +166,8 @@ define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
     %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd double %3, %3
     store double %4, double* %q
     ret void
@@ -152,6 +180,8 @@ define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
     %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
+    br label %return_bb
+return_bb:
     %4 = add <1 x i64> %3, %3
     store <1 x i64> %4, <1 x i64>* %q
     ret void
@@ -164,6 +194,8 @@ define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
     %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
+    br label %return_bb
+return_bb:
     %4 = add <1 x i64> %3, %3
     store <1 x i64> %4, <1 x i64>* %q
     ret void
@@ -176,6 +208,8 @@ define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
     %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <1 x i64> %3, %3
     store <1 x i64> %4, <1 x i64>* %q
     ret void
@@ -188,6 +222,8 @@ define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
     %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = add <1 x i64> %3, %3
     store <1 x i64> %4, <1 x i64>* %q
     ret void
@@ -200,6 +236,8 @@ define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
     %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = add <1 x i64> %3, %3
     store <1 x i64> %4, <1 x i64>* %q
     ret void
@@ -212,6 +250,8 @@ define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
     %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = add <1 x i64> %3, %3
     store <1 x i64> %4, <1 x i64>* %q
     ret void
@@ -224,6 +264,8 @@ define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
     %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x float> %3, %3
     store <2 x float> %4, <2 x float>* %q
     ret void
@@ -236,6 +278,8 @@ define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
     %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x float> %3, %3
     store <2 x float> %4, <2 x float>* %q
     ret void
@@ -248,6 +292,8 @@ define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
     %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x float> %3, %3
     store <2 x float> %4, <2 x float>* %q
     ret void
@@ -261,6 +307,8 @@ define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
     %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x float> %3, %3
     store <2 x float> %4, <2 x float>* %q
     ret void
@@ -274,6 +322,8 @@ define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
     %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x float> %3, %3
     store <2 x float> %4, <2 x float>* %q
     ret void
@@ -287,6 +337,8 @@ define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
     %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x float> %3, %3
     store <2 x float> %4, <2 x float>* %q
     ret void
@@ -299,6 +351,8 @@ define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
     %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i32> %3, %3
     store <2 x i32> %4, <2 x i32>* %q
     ret void
@@ -311,6 +365,8 @@ define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
     %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i32> %3, %3
     store <2 x i32> %4, <2 x i32>* %q
     ret void
@@ -323,6 +379,8 @@ define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
     %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i32> %3, %3
     store <2 x i32> %4, <2 x i32>* %q
     ret void
@@ -336,6 +394,8 @@ define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
     %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i32> %3, %3
     store <2 x i32> %4, <2 x i32>* %q
     ret void
@@ -349,6 +409,8 @@ define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
     %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i32> %3, %3
     store <2 x i32> %4, <2 x i32>* %q
     ret void
@@ -362,6 +424,8 @@ define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
     %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i32> %3, %3
     store <2 x i32> %4, <2 x i32>* %q
     ret void
@@ -374,6 +438,8 @@ define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
     %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i16> %3, %3
     store <4 x i16> %4, <4 x i16>* %q
     ret void
@@ -386,6 +452,8 @@ define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
     %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i16> %3, %3
     store <4 x i16> %4, <4 x i16>* %q
     ret void
@@ -398,6 +466,8 @@ define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
     %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i16> %3, %3
     store <4 x i16> %4, <4 x i16>* %q
     ret void
@@ -411,6 +481,8 @@ define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
     %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i16> %3, %3
     store <4 x i16> %4, <4 x i16>* %q
     ret void
@@ -424,6 +496,8 @@ define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
     %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i16> %3, %3
     store <4 x i16> %4, <4 x i16>* %q
     ret void
@@ -437,6 +511,8 @@ define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
     %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i16> %3, %3
     store <4 x i16> %4, <4 x i16>* %q
     ret void
@@ -449,6 +525,8 @@ define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
     %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i8> %3, %3
     store <8 x i8> %4, <8 x i8>* %q
     ret void
@@ -461,6 +539,8 @@ define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
     %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i8> %3, %3
     store <8 x i8> %4, <8 x i8>* %q
     ret void
@@ -473,6 +553,8 @@ define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
     %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i8> %3, %3
     store <8 x i8> %4, <8 x i8>* %q
     ret void
@@ -486,6 +568,8 @@ define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
     %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i8> %3, %3
     store <8 x i8> %4, <8 x i8>* %q
     ret void
@@ -499,6 +583,8 @@ define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
     %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i8> %3, %3
     store <8 x i8> %4, <8 x i8>* %q
     ret void
@@ -512,6 +598,8 @@ define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
     %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i8> %3, %3
     store <8 x i8> %4, <8 x i8>* %q
     ret void
@@ -524,6 +612,8 @@ define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
     %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd fp128 %3, %3
     store fp128 %4, fp128* %q
     ret void
@@ -536,6 +626,8 @@ define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
     %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd fp128 %3, %3
     store fp128 %4, fp128* %q
     ret void
@@ -549,6 +641,8 @@ define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
     %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd fp128 %3, %3
     store fp128 %4, fp128* %q
     ret void
@@ -562,6 +656,8 @@ define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
     %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd fp128 %3, %3
     store fp128 %4, fp128* %q
     ret void
@@ -575,6 +671,8 @@ define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
     %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd fp128 %3, %3
     store fp128 %4, fp128* %q
     ret void
@@ -588,6 +686,8 @@ define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
     %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd fp128 %3, %3
     store fp128 %4, fp128* %q
     ret void
@@ -600,6 +700,8 @@ define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
     %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x double> %3, %3
     store <2 x double> %4, <2 x double>* %q
     ret void
@@ -613,6 +715,8 @@ define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
     %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x double> %3, %3
     store <2 x double> %4, <2 x double>* %q
     ret void
@@ -627,6 +731,8 @@ define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
     %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x double> %3, %3
     store <2 x double> %4, <2 x double>* %q
     ret void
@@ -641,6 +747,8 @@ define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
     %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x double> %3, %3
     store <2 x double> %4, <2 x double>* %q
     ret void
@@ -655,6 +763,8 @@ define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
     %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x double> %3, %3
     store <2 x double> %4, <2 x double>* %q
     ret void
@@ -669,6 +779,8 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
     %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <2 x double> %3, %3
     store <2 x double> %4, <2 x double>* %q
     ret void
@@ -681,6 +793,8 @@ define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
     %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i64> %3, %3
     store <2 x i64> %4, <2 x i64>* %q
     ret void
@@ -694,6 +808,8 @@ define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
     %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i64> %3, %3
     store <2 x i64> %4, <2 x i64>* %q
     ret void
@@ -708,6 +824,8 @@ define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
     %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i64> %3, %3
     store <2 x i64> %4, <2 x i64>* %q
     ret void
@@ -722,6 +840,8 @@ define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
     %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i64> %3, %3
     store <2 x i64> %4, <2 x i64>* %q
     ret void
@@ -736,6 +856,8 @@ define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
     %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i64> %3, %3
     store <2 x i64> %4, <2 x i64>* %q
     ret void
@@ -750,6 +872,8 @@ define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
     %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = add <2 x i64> %3, %3
     store <2 x i64> %4, <2 x i64>* %q
     ret void
@@ -763,6 +887,8 @@ define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
     %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <4 x float> %3, %3
     store <4 x float> %4, <4 x float>* %q
     ret void
@@ -777,6 +903,8 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
     %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <4 x float> %3, %3
     store <4 x float> %4, <4 x float>* %q
     ret void
@@ -791,6 +919,8 @@ define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
     %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <4 x float> %3, %3
     store <4 x float> %4, <4 x float>* %q
     ret void
@@ -806,6 +936,8 @@ define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
     %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <4 x float> %3, %3
     store <4 x float> %4, <4 x float>* %q
     ret void
@@ -821,6 +953,8 @@ define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
     %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <4 x float> %3, %3
     store <4 x float> %4, <4 x float>* %q
     ret void
@@ -836,6 +970,8 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
     %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = fadd <4 x float> %3, %3
     store <4 x float> %4, <4 x float>* %q
     ret void
@@ -849,6 +985,8 @@ define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
     %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i32> %3, %3
     store <4 x i32> %4, <4 x i32>* %q
     ret void
@@ -863,6 +1001,8 @@ define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
     %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i32> %3, %3
     store <4 x i32> %4, <4 x i32>* %q
     ret void
@@ -877,6 +1017,8 @@ define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
     %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i32> %3, %3
     store <4 x i32> %4, <4 x i32>* %q
     ret void
@@ -892,6 +1034,8 @@ define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
     %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i32> %3, %3
     store <4 x i32> %4, <4 x i32>* %q
     ret void
@@ -907,6 +1051,8 @@ define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
     %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i32> %3, %3
     store <4 x i32> %4, <4 x i32>* %q
     ret void
@@ -922,6 +1068,8 @@ define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
     %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = add <4 x i32> %3, %3
     store <4 x i32> %4, <4 x i32>* %q
     ret void
@@ -935,6 +1083,8 @@ define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
     %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i16> %3, %3
     store <8 x i16> %4, <8 x i16>* %q
     ret void
@@ -949,6 +1099,8 @@ define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
     %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i16> %3, %3
     store <8 x i16> %4, <8 x i16>* %q
     ret void
@@ -963,6 +1115,8 @@ define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
     %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i16> %3, %3
     store <8 x i16> %4, <8 x i16>* %q
     ret void
@@ -978,6 +1132,8 @@ define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
     %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i16> %3, %3
     store <8 x i16> %4, <8 x i16>* %q
     ret void
@@ -993,6 +1149,8 @@ define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
     %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i16> %3, %3
     store <8 x i16> %4, <8 x i16>* %q
     ret void
@@ -1008,6 +1166,8 @@ define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
     %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
+    br label %return_bb
+return_bb:
     %4 = add <8 x i16> %3, %3
     store <8 x i16> %4, <8 x i16>* %q
     ret void
@@ -1021,6 +1181,8 @@ define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
     %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
+    br label %return_bb
+return_bb:
     %4 = add <16 x i8> %3, %3
     store <16 x i8> %4, <16 x i8>* %q
     ret void
@@ -1035,6 +1197,8 @@ define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
     %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
+    br label %return_bb
+return_bb:
     %4 = add <16 x i8> %3, %3
     store <16 x i8> %4, <16 x i8>* %q
     ret void
@@ -1049,6 +1213,8 @@ define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
     %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
+    br label %return_bb
+return_bb:
     %4 = add <16 x i8> %3, %3
     store <16 x i8> %4, <16 x i8>* %q
     ret void
@@ -1064,6 +1230,8 @@ define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
     %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
+    br label %return_bb
+return_bb:
     %4 = add <16 x i8> %3, %3
     store <16 x i8> %4, <16 x i8>* %q
     ret void
@@ -1079,6 +1247,8 @@ define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
     %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
+    br label %return_bb
+return_bb:
     %4 = add <16 x i8> %3, %3
     store <16 x i8> %4, <16 x i8>* %q
     ret void
@@ -1094,6 +1264,8 @@ define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
     %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
+    br label %return_bb
+return_bb:
     %4 = add <16 x i8> %3, %3
     store <16 x i8> %4, <16 x i8>* %q
     ret void
diff --git a/test/CodeGen/AArch64/arm64-call-tailcalls.ll b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
index 71d9327..6621db2 100644
--- a/test/CodeGen/AArch64/arm64-call-tailcalls.ll
+++ b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
@@ -53,9 +53,9 @@ bb:                                               ; preds = %entry
 
 define i32 @t8(i32 %x) nounwind ssp {
 ; CHECK-LABEL: t8:
+; CHECK: b	_c
 ; CHECK: b	_a
 ; CHECK: b	_b
-; CHECK: b	_c
   %and = and i32 %x, 1
   %tobool = icmp eq i32 %and, 0
   br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
new file mode 100644
index 0000000..f0b8299
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
@@ -0,0 +1,638 @@
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
+
+; CodeGenPrepare should move the zext into the block with the load
+; so that SelectionDAG can select it with the load.
+;
+; OPTALL-LABEL: @foo
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPTALL: store i32 [[ZEXT]], i32* %q
+; OPTALL: ret
+define void @foo(i8* %p, i32* %q) {
+entry:
+  %t = load i8, i8* %p
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %t to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a zextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; OPTALL-LABEL: @promoteOneArg
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
+; Make sure the operation is not promoted when the promotion pass is disabled.
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArg(i8* %p, i32* %q) {
+entry:
+  %t = load i8, i8* %p
+  %add = add nuw i8 %t, 2
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a sextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteOneArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArgSExt(i8* %p, i32* %q) {
+entry:
+  %t = load i8, i8* %p
+  %add = add nsw i8 %t, 2
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = sext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a zextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Extending %add will create two extensions:
+; 1. One for %b.
+; 2. One for %t.
+; #1 will not be removed as we do not know anything about %b.
+; #2 may not be merged with the load because %t is used in a comparison.
+; Since two extensions may be emitted in the end instead of one before the
+; transformation, the regular heuristic does not apply the optimization. 
+; 
+; OPTALL-LABEL: @promoteTwoArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
+entry:
+  %t = load i8, i8* %p
+  %add = add nuw i8 %t, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a sextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteTwoArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
+entry:
+  %t = load i8, i8* %p
+  %add = add nsw i8 %t, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = sext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we do not a zextload if we need to introduce more than
+; one additional extension.
+; OPTALL-LABEL: @promoteThreeArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
+;
+; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: add nuw i8
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
+entry:
+  %t = load i8, i8* %p
+  %tmp = add nuw i8 %t, %b
+  %add = add nuw i8 %tmp, %c
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a zextload after promoting and merging
+; two extensions.
+; OPTALL-LABEL: @promoteMergeExtArgZExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
+entry:
+  %t = load i8, i8* %p
+  %ext = zext i8 %t to i16
+  %add = add nuw i16 %ext, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i16 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a sextload after promoting and merging
+; two extensions.
+; Version with sext.
+; OPTALL-LABEL: @promoteMergeExtArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
+entry:
+  %t = load i8, i8* %p
+  %ext = zext i8 %t to i16
+  %add = add nsw i16 %ext, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = sext i16 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to catch all the extload opportunities that are exposed
+; by the different iterations of codegen prepare.
+; Moreover, check that we do not promote more than we need to.
+; Here is what is happening in this test (not necessarly in this order):
+; 1. We try to promote the operand of %sextadd.
+;    a. This creates one sext of %ld2 and one of %zextld
+;    b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
+;       introduced one. This is fine with the current heuristic: neutral.
+;    => We have one zext of %zextld left and we created one sext of %ld2.
+; 2. We try to promote the operand of %sextaddza.
+;    a. This creates one sext of %zexta and one of %zextld
+;    b. The sext of %zexta does not lead to any load, it stays here, even if it
+;       could have been combine with the zext of %a.
+;    c. The sext of %zextld leads to %ld and can be combined with it. This is
+;       done by promoting %zextld. This is fine with the current heuristic:
+;       neutral.
+;    => We have created a new zext of %ld and we created one sext of %zexta.
+; 3. We try to promote the operand of %sextaddb.
+;    a. This creates one sext of %b and one of %zextld
+;    b. The sext of %b is a dead-end, nothing to be done.
+;    c. Same thing as 2.c. happens.
+;    => We have created a new zext of %ld and we created one sext of %b.
+; 4. We try to promote the operand of the zext of %zextld introduced in #1.
+;    a. Same thing as 2.c. happens.
+;    b. %zextld does not have any other uses. It is dead coded.
+;    => We have created a new zext of %ld and we removed a zext of %zextld and
+;       a zext of %ld.
+; Currently we do not try to reuse existing extensions, so in the end we have
+; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
+;
+; OPTALL-LABEL: @severalPromotions
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
+; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
+; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
+; We do not combine this one: see 2.b.
+; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32
+; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64
+; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]]
+; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]]
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADD]] to i64
+; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDZA]] to i64
+; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDB]] to i64
+;
+; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
+; OPTALL: ret
+define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
+  %ld = load i8, i8* %addr1
+  %zextld = zext i8 %ld to i32
+  %ld2 = load i32, i32* %addr2
+  %add = add nsw i32 %ld2, %zextld
+  %sextadd = sext i32 %add to i64
+  %zexta = zext i8 %a to i32
+  %addza = add nsw i32 %zexta, %zextld
+  %sextaddza = sext i32 %addza to i64
+  %addb = add nsw i32 %b, %zextld
+  %sextaddb = sext i32 %addb to i64
+  call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
+  ret void
+}
+
+declare void @dummy(i64, i64, i64)
+
+; Make sure we do not try to promote vector types since the type promotion
+; helper does not support them for now.
+; OPTALL-LABEL: @vectorPromotion
+; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
+; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
+; OPTALL: ret
+define void @vectorPromotion() {
+entry:
+  %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
+  %b = zext <2 x i32> %a to <2 x i64>
+  ret void
+}
+
+@a = common global i32 0, align 4
+@c = common global [2 x i32] zeroinitializer, align 4
+
+; Make sure we support promotion of operands that produces a Value as opposed
+; to an instruction.
+; This used to cause a crash.
+; OPTALL-LABEL: @promotionOfArgEndsUpInValue
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
+;
+; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
+;
+; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+;
+; OPTALL-NEXT: ret i32 [[RES]]
+define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
+entry:
+  %val = load i16, i16* %addr
+  %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+  %conv3 = sext i16 %add to i32
+  ret i32 %conv3
+}
+
+; Check that we see that one zext can be derived from the other for free.
+; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
+; OPT-NEXT: store i32 [[RES32]], i32* %addr
+; OPT-NEXT: store i64 [[RES64]], i64* %q
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
+; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
+; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
+; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
+;
+; OPTALL-NEXT: ret void
+define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nuw i32 %zextt, %b
+  %add2 = add nuw i32 %zextt, 12
+  store i32 %add, i32 *%addr
+  %s = zext i32 %add2 to i64
+  store i64 %s, i64* %q
+  ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nsw i32 %zextt, %b
+  %idx64 = sext i32 %add to i64
+  %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
+  store i32 %add, i32 *%staddr
+  ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nsw i32 %zextt, %b
+  %idx64 = sext i32 %add to i64
+  %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64
+  store i64 %stuff, i64 *%staddr
+  ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nsw i32 %zextt, %b
+  %idx64 = sext i32 %add to i64
+  %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64
+  store i128 %stuff, i128 *%staddr
+  ret void
+}
+
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @promoteSExtFromAddrMode256
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nsw i32 %zextt, %b
+  %idx64 = sext i32 %add to i64
+  %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64
+  store i256 %stuff, i256 *%staddr
+  ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free zext.
+; When we promote all the way through the load, we end up with
+; a free zext and a non-free zext (of %b).
+; However, the current target lowering says zext i32 to i64 is free
+; so the promotion happens because the cost did not change and may
+; expose more opportunities.
+; This would need to be fixed at some point.
+; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; This transformation should really happen only for stress mode.
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
+; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
+; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nuw i32 %zextt, %b
+  %idx64 = zext i32 %add to i64
+  %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
+  store i32 %add, i32 *%staddr
+  ret void
+}
+
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
+; OPTALL-NEXT: ret i64 %staddr
+define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nsw i32 %zextt, %b
+  %idx64 = sext i32 %add to i64
+  %staddr = shl i64 %idx64, 12
+  ret i64 %staddr
+}
+
+; Same comment as doNotPromoteFreeZExtFromAddrMode.
+; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; This transformation should really happen only for stress mode.
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
+; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
+; OPTALL-NEXT: ret i64 %staddr
+define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) {
+entry:
+  %t = load i8, i8* %p
+  %zextt = zext i8 %t to i32
+  %add = add nuw i32 %zextt, %b
+  %idx64 = zext i32 %add to i64
+  %staddr = shl i64 %idx64, 12
+  ret i64 %staddr
+}
+
+; The input has one free zext and one non-free sext.
+; When we promote all the way through to the load, we end up with
+; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
+; However, we when generate load pair and the free sext(%ld1) becomes
+; non-free. So technically, we trade a non-free sext to two non-free
+; sext.
+; This would need to be fixed at some point.
+; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad
+; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p
+; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1
+; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]]
+;
+; This transformation should really happen only for stress mode.
+; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64
+; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64
+; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]]
+;
+; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst
+; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64
+;
+; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64
+; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
+; OPTALL-NEXT: ret i64 [[FINAL]]
+define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) {
+  %ld0 = load i32, i32* %p
+  %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1
+  %ld1 = load i32, i32* %idxLd1
+  %res = add nsw i32 %ld1, %cst
+  %sextres = sext i32 %res to i64
+  %zextLd0 = zext i32 %ld0 to i64
+  %final = add i64 %sextres, %zextLd0
+  ret i64 %final
+}
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index c6b7d83..c4e3e4e 100644
--- a/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -3,11 +3,11 @@
 
 define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
 ; CHECK: fptosi_v4f64_to_v4i16
-; CHECK-DAG: fcvtzs  v[[LHS:[0-9]+]].2d, v1.2d
-; CHECK-DAG: fcvtzs  v[[RHS:[0-9]+]].2d, v0.2d
-; CHECK-DAG: xtn  v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
-; CHECK-DAG: xtn  v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
-; CHECK:     uzp1  v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+; CHECK-DAG: fcvtzs  v[[LHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: fcvtzs  v[[RHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: xtn  v[[MID:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn2  v[[MID]].4s, v[[RHS]].2d
+; CHECK:     xtn  v0.4h, v[[MID]].4s
   %tmp1 = load <4 x double>, <4 x double>* %ptr
   %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
   ret <4 x i16> %tmp2
@@ -15,17 +15,17 @@ define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
 
 define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
 ; CHECK: fptosi_v4f64_to_v4i8
-; CHECK-DAG:  fcvtzs  v[[CONV3:[0-9]+]].2d, v3.2d
-; CHECK-DAG:  fcvtzs  v[[CONV2:[0-9]+]].2d, v2.2d
-; CHECK-DAG:  fcvtzs  v[[CONV1:[0-9]+]].2d, v1.2d
 ; CHECK-DAG:  fcvtzs  v[[CONV0:[0-9]+]].2d, v0.2d
-; CHECK-DAG:  xtn  v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG:  fcvtzs  v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG:  fcvtzs  v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG:  fcvtzs  v[[CONV3:[0-9]+]].2d, v3.2d
 ; CHECK-DAG:  xtn  v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
-; CHECK-DAG:  xtn  v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG:  xtn2  v[[NA2]].4s, v[[CONV3]].2d
 ; CHECK-DAG:  xtn  v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
-; CHECK-DAG:  uzp1  v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
-; CHECK-DAG:  uzp1  v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
-; CHECK:      uzp1  v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+; CHECK-DAG:  xtn2  v[[NA0]].4s, v[[CONV1]].2d
+; CHECK-DAG:  xtn  v[[TMP1:[0-9]+]].4h, v[[NA0]].4s
+; CHECK-DAG:  xtn2  v[[TMP1]].8h, v[[NA2]].4s
+; CHECK:      xtn  v0.8b, v[[TMP1]].8h
   %tmp1 = load <8 x double>, <8 x double>* %ptr
   %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
   ret <8 x i8> %tmp2
diff --git a/test/CodeGen/AArch64/arm64-dup.ll b/test/CodeGen/AArch64/arm64-dup.ll
index 849e227..c6b7de3 100644
--- a/test/CodeGen/AArch64/arm64-dup.ll
+++ b/test/CodeGen/AArch64/arm64-dup.ll
@@ -321,3 +321,40 @@ entry:
   %sub = sub <4 x i16> %a, %mul
   ret <4 x i16> %sub
 }
+
+; Also test the DUP path in the PerfectShuffle generator.
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4i16:
+; CHECK-NEXT: dup.4h v0, v0[0]
+; CHECK-NEXT: ext.8b v0, v0, v1, #4
+define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
+  %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+  ret <4 x i16> %r
+}
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4f16:
+; CHECK-NEXT: dup.4h v0, v0[0]
+; CHECK-NEXT: ext.8b v0, v0, v1, #4
+; CHECK-NEXT: ret
+define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b) nounwind {
+  %r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+  ret <4 x half> %r
+}
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4i32:
+; CHECK-NEXT: dup.4s v0, v0[0]
+; CHECK-NEXT: ext.16b v0, v0, v1, #8
+; CHECK-NEXT: ret
+define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+  ret <4 x i32> %r
+}
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4f32:
+; CHECK-NEXT: dup.4s v0, v0[0]
+; CHECK-NEXT: ext.16b v0, v0, v1, #8
+; CHECK-NEXT: ret
+define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> %b) nounwind {
+  %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+  ret <4 x float> %r
+}
diff --git a/test/CodeGen/AArch64/arm64-fcopysign.ll b/test/CodeGen/AArch64/arm64-fcopysign.ll
index 66241df..feffd41 100644
--- a/test/CodeGen/AArch64/arm64-fcopysign.ll
+++ b/test/CodeGen/AArch64/arm64-fcopysign.ll
@@ -39,7 +39,7 @@ entry:
 ; CHECK: fcvt s0, d0
 ; CHECK: movi.4s v[[CONST:[0-9]+]], #0x80, lsl #24
 ; CHECK: bit.16b v{{[0-9]+}}, v0, v[[CONST]]
-  %0 = tail call double (...)* @bar() nounwind
+  %0 = tail call double (...) @bar() nounwind
   %1 = fptrunc double %0 to float
   %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
   %3 = fadd float %1, %2
diff --git a/test/CodeGen/AArch64/arm64-join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll
index e99168b..dee0344 100644
--- a/test/CodeGen/AArch64/arm64-join-reserved.ll
+++ b/test/CodeGen/AArch64/arm64-join-reserved.ll
@@ -10,7 +10,7 @@ target triple = "arm64-apple-macosx10"
 ; CHECK: ret
 define void @g() nounwind ssp {
 entry:
-  tail call void (i32, ...)* @f(i32 0, i32 0) nounwind
+  tail call void (i32, ...) @f(i32 0, i32 0) nounwind
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
new file mode 100644
index 0000000..5bc4d71
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=arm64-apple-ios -aarch64-strict-align < %s | FileCheck %s
+
+; Small (16-bytes here) unaligned memcpys should stay memcpy calls if
+; strict-alignment is turned on.
+define void @t0(i8* %out, i8* %in) {
+; CHECK-LABEL: t0:
+; CHECK:         orr w2, wzr, #0x10
+; CHECK-NEXT:    bl _memcpy
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index 4a92c3d..b74a406 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1086,7 +1086,7 @@ define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
 entry:
   %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   %d = insertelement <2 x i32> undef, i32 %c, i32 0
diff --git a/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
new file mode 100644
index 0000000..51ed8a1
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -0,0 +1,456 @@
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V8a
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32)
+declare i16 @llvm.aarch64.neon.sqrdmulh.i16(i16, i16)
+
+declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
+declare i16 @llvm.aarch64.neon.sqadd.i16(i16, i16)
+
+declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
+declare i16 @llvm.aarch64.neon.sqsub.i16(i16, i16)
+
+;-----------------------------------------------------------------------------
+; RDMA Vector
+; test for SIMDThreeSameVectorSQRDMLxHTiedHS
+
+define <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v4i16:
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
+   %retval =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc,  <4 x i16> %prod)
+; CHECK-V8a:        sqrdmulh    v1.4h, v1.4h, v2.4h
+; CHECK-V81a:       sqrdmlah    v0.4h, v1.4h, v2.4h
+; CHECK-V81a-apple: sqrdmlah.4h v0,    v1,    v2
+   ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v8i16:
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
+   %retval =  call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.8h
+; CHECK-V81a:       sqrdmlah    v0.8h, v1.8h, v2.8h
+; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2
+   ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v2i32:
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
+   %retval =  call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.2s
+; CHECK-V81a:       sqrdmlah    v0.2s, v1.2s, v2.2s
+; CHECK-V81a-apple: sqrdmlah.2s v0,    v1,    v2
+   ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v4i32:
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
+   %retval =  call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V81:        sqrdmulh    v1.4s, v1.4s, v2.4s
+; CHECK-V81a:       sqrdmlah    v0.4s, v1.4s, v2.4s
+; CHECK-V81a-apple: sqrdmlah.4s v0,    v1,    v2
+   ret <4 x i32> %retval
+}
+
+define <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v4i16:
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
+   %retval =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK-V8a:        sqrdmulh    v1.4h, v1.4h, v2.4h
+; CHECK-V81a:       sqrdmlsh    v0.4h, v1.4h, v2.4h
+; CHECK-V81a-apple: sqrdmlsh.4h v0,    v1,    v2
+   ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v8i16:
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
+   %retval =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.8h
+; CHECK-V81a:       sqrdmlsh    v0.8h, v1.8h, v2.8h
+; CHECK-V81a-apple: sqrdmlsh.8h v0,    v1,    v2
+   ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v2i32:
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
+   %retval =  call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.2s
+; CHECK-V81a:       sqrdmlsh    v0.2s, v1.2s, v2.2s
+; CHECK-V81a-apple: sqrdmlsh.2s v0,    v1,    v2
+   ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v4i32:
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
+   %retval =  call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V8a:        sqrdmulh    v1.4s, v1.4s, v2.4s
+; CHECK-V81a:       sqrdmlsh    v0.4s, v1.4s, v2.4s
+; CHECK-V81a-apple: sqrdmlsh.4s v0,    v1,    v2
+   ret <4 x i32> %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Vector, by element
+; tests for vXiYY_indexed in SIMDIndexedSQRDMLxHSDTied
+
+define <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlah_lane_s16:
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+  %retval =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK-V8a :       sqrdmulh    v1.4h, v1.4h, v2.h[3]
+; CHECK-V81a:       sqrdmlah    v0.4h, v1.4h, v2.h[3]
+; CHECK-V81a-apple: sqrdmlah.4h v0,    v1,    v2[3]
+  ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlahq_lane_s16:
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+  %retval =  call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.h[2]
+; CHECK-V81a:       sqrdmlah    v0.8h, v1.8h, v2.h[2]
+; CHECK-V81a-apple: sqrdmlah.8h v0,    v1,    v2[2]
+  ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlah_lane_s32:
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+  %retval =  call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.s[1]
+; CHECK-V81a:       sqrdmlah    v0.2s, v1.2s, v2.s[1]
+; CHECK-V81a-apple: sqrdmlah.2s v0,    v1,    v2[1]
+  ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlahq_lane_s32:
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+  %retval =  call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V8a:        sqrdmulh    v1.4s, v1.4s, v2.s[0]
+; CHECK-V81a:       sqrdmlah    v0.4s, v1.4s, v2.s[0]
+; CHECK-V81a-apple: sqrdmlah.4s v0,    v1,    v2[0]
+  ret <4 x i32> %retval
+}
+
+define <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlsh_lane_s16:
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+  %retval =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK-V8a:        sqrdmulh    v1.4h, v1.4h, v2.h[3]
+; CHECK-V81a:       sqrdmlsh    v0.4h, v1.4h, v2.h[3]
+; CHECK-V81a-apple: sqrdmlsh.4h v0,    v1,    v2[3]
+  ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlshq_lane_s16:
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+  %retval =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a:        sqrdmulh    v1.8h, v1.8h, v2.h[2]
+; CHECK-V81a:       sqrdmlsh    v0.8h, v1.8h, v2.h[2]
+; CHECK-V81a-apple: sqrdmlsh.8h v0,    v1,    v2[2]
+  ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlsh_lane_s32:
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+  %retval =  call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a:        sqrdmulh    v1.2s, v1.2s, v2.s[1]
+; CHECK-V81a:       sqrdmlsh    v0.2s, v1.2s, v2.s[1]
+; CHECK-V81a-apple: sqrdmlsh.2s v0,    v1,    v2[1]
+  ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlshq_lane_s32:
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+  %retval =  call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V8a:        sqrdmulh    v1.4s, v1.4s, v2.s[0]
+; CHECK-V81a:       sqrdmlsh    v0.4s, v1.4s, v2.s[0]
+; CHECK-V81a-apple: sqrdmlsh.4s v0,    v1,    v2[0]
+  ret <4 x i32> %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Vector, by element, extracted
+; i16 tests are for vXi16_indexed in SIMDIndexedSQRDMLxHSDTied, with IR in ACLE style
+; i32 tests are for   "def : Pat" in SIMDIndexedSQRDMLxHSDTied
+
+define i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlah_extracted_lane_s16:
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
+  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
+  %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a:       sqrdmlah    {{v[2-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlah.4h {{v[2-9]+}},    v0,    v1[1]
+  ret i16 %retval
+}
+
+define i16 @test_sqrdmlahq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s16:
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
+  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+  %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
+  %retval = extractelement <8 x i16> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a:       sqrdmlah    {{v[2-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlah.8h {{v[2-9]+}},    v0,    v1[1]
+  ret i16 %retval
+}
+
+define i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32:
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+  %extract = extractelement <2 x i32> %prod, i64 0
+  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
+; CHECK-V8a:        sqrdmulh    v0.2s, v0.2s, v1.s[0]
+; CHECK-V81a:       sqrdmlah    v2.2s, v0.2s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlah.2s v2,    v0,    v1[0]
+  ret i32 %retval
+}
+
+define i32 @test_sqrdmlahq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s32:
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+  %extract = extractelement <4 x i32> %prod, i64 0
+  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
+; CHECK-V8a:        sqrdmulh    v0.4s, v0.4s, v1.s[0]
+; CHECK-V81a:       sqrdmlah    v2.4s, v0.4s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlah.4s v2,    v0,    v1[0]
+  ret i32 %retval
+}
+
+define i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s16:
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
+  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
+  %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a:       sqrdmlsh    {{v[2-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlsh.4h {{v[2-9]+}},    v0,    v1[1]
+  ret i16 %retval
+}
+
+define i16 @test_sqrdmlshq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s16:
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
+  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+  %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
+  %retval = extractelement <8 x i16> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a:       sqrdmlsh    {{v[2-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlsh.8h {{v[2-9]+}},    v0,    v1[1]
+  ret i16 %retval
+}
+
+define i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32:
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+  %extract = extractelement <2 x i32> %prod, i64 0
+  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
+; CHECK-V8a:        sqrdmulh    v0.2s, v0.2s, v1.s[0]
+; CHECK-V81a:       sqrdmlsh    v2.2s, v0.2s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlsh.2s v2,    v0,    v1[0]
+  ret i32 %retval
+}
+
+define i32 @test_sqrdmlshq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s32:
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+  %extract = extractelement <4 x i32> %prod, i64 0
+  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
+; CHECK-V8a:        sqrdmulh    v0.4s, v0.4s, v1.s[0]
+; CHECK-V81a:       sqrdmlsh    v2.4s, v0.4s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlsh.4s v2,    v0,    v1[0]
+  ret i32 %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Scalar
+; test for "def : Pat" near SIMDThreeScalarHSTied in AArch64InstInfo.td
+
+define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
+; CHECK-LABEL: test_sqrdmlah_v1i16:
+  %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
+  %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
+  %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec,  <4 x i16> %y_vec)
+  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec,  <4 x i16> %prod_vec)
+  %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a:       sqrdmlah    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
+  ret i16 %retval
+}
+
+define i32 @test_sqrdmlah_v1i32(i32 %acc, i32 %x, i32 %y) {
+; CHECK-LABEL: test_sqrdmlah_v1i32:
+  %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
+  %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
+  %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec,  <4 x i32> %y_vec)
+  %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
+  %retval_vec =  call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc_vec,  <4 x i32> %prod_vec)
+  %retval = extractelement <4 x i32> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a:       sqrdmlah    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a-apple: sqrdmlah.4s {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
+  ret i32 %retval
+}
+
+
+define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
+; CHECK-LABEL: test_sqrdmlsh_v1i16:
+  %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
+  %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
+  %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec,  <4 x i16> %y_vec)
+  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec,  <4 x i16> %prod_vec)
+  %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a:       sqrdmlsh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a-apple: sqrdmlsh.4h {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
+  ret i16 %retval
+}
+
+define i32 @test_sqrdmlsh_v1i32(i32 %acc, i32 %x, i32 %y) {
+; CHECK-LABEL: test_sqrdmlsh_v1i32:
+  %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
+  %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
+  %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec,  <4 x i32> %y_vec)
+  %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
+  %retval_vec =  call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc_vec,  <4 x i32> %prod_vec)
+  %retval = extractelement <4 x i32> %retval_vec, i64 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a:       sqrdmlsh    {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a-apple: sqrdmlsh.4s {{v[0-9]+}},    {{v[0-9]+}},    {{v[0-9]+}}
+  ret i32 %retval
+}
+define i32 @test_sqrdmlah_i32(i32 %acc, i32 %mhs, i32 %rhs) {
+; CHECK-LABEL: test_sqrdmlah_i32:
+  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %rhs)
+  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc,  i32 %prod)
+; CHECK-V8a:        sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a:       sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a-apple: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret i32 %retval
+}
+
+define i32 @test_sqrdmlsh_i32(i32 %acc, i32 %mhs, i32 %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_i32:
+  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %rhs)
+  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc,  i32 %prod)
+; CHECK-V8a:        sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a:       sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a-apple: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret i32 %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Scalar, by element
+; i16 tests are performed via tests in above chapter, with IR in ACLE style
+; i32 tests are for i32_indexed in SIMDIndexedSQRDMLxHSDTied
+
+define i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %x, <4 x i16> %y_vec) {
+; CHECK-LABEL: test_sqrdmlah_extract_i16:
+  %shuffle = shufflevector <4 x i16> %y_vec, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
+  %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
+  %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %shuffle)
+  %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
+  %retval = extractelement <4 x i16> %retval_vec, i32 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
+; CHECK-V81a:       sqrdmlah    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
+; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}},    {{v[0-9]+}}, v0[1]
+  ret i16 %retval
+}
+
+define i32 @test_sqrdmlah_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_extract_i32:
+  %extract = extractelement <4 x i32> %rhs, i32 3
+  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %extract)
+  %retval =  call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc,  i32 %prod)
+; CHECK-V8a:        sqrdmulh   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a:       sqrdmlah   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a-apple: sqrdmlah.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
+  ret i32 %retval
+}
+
+define i16 @test_sqrdmlshq_extract_i16(i16 %acc, i16 %x, <8 x i16> %y_vec) {
+; CHECK-LABEL: test_sqrdmlshq_extract_i16:
+  %shuffle = shufflevector <8 x i16> %y_vec, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1>
+  %x_vec = insertelement <8 x i16> undef, i16 %x, i64 0
+  %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x_vec, <8 x i16> %shuffle)
+  %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
+  %retval_vec =  call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
+  %retval = extractelement <8 x i16> %retval_vec, i32 0
+; CHECK-V8a:        sqrdmulh    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
+; CHECK-V81a:       sqrdmlsh    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
+; CHECK-V81a-apple: sqrdmlsh.8h {{v[0-9]+}},    {{v[0-9]+}}, v0[1]
+  ret i16 %retval
+}
+
+define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_extract_i32:
+  %extract = extractelement <4 x i32> %rhs, i32 3
+  %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs,  i32 %extract)
+  %retval =  call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc,  i32 %prod)
+; CHECK-V8a:        sqrdmulh   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a:       sqrdmlsh   {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a-apple: sqrdmlsh.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
+  ret i32 %retval
+}
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll b/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
index 5a740d8..2651f11 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
@@ -9,7 +9,7 @@
 define void @clobberScratch(i32* %p) {
   %v = load i32, i32* %p
   tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
   store i32 %v, i32* %p
   ret void
 }
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
index 8f79f80..b8236c5 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
@@ -23,9 +23,9 @@ entry:
 ; FAST-NEXT:   movk  x16, #0xbeef
 ; FAST-NEXT:   blr x16
   %resolveCall2 = inttoptr i64 281474417671919 to i8*
-  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
+  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
   %resolveCall3 = inttoptr i64 244837814038255 to i8*
-  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
+  tail call webkit_jscc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
   ret void
 }
 
@@ -59,7 +59,7 @@ entry:
 ; FAST-NEXT:   movk  x16, #0xbeef
 ; FAST-NEXT:   blr x16
   %call = inttoptr i64 281474417671919 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
   ret i64 %result
 }
 
@@ -101,7 +101,7 @@ entry:
 ; FAST-NEXT:   movk  x16, #0xbeef
 ; FAST-NEXT:   blr x16
   %call = inttoptr i64 281474417671919 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
   ret i64 %result
 }
 
diff --git a/test/CodeGen/AArch64/arm64-patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll
index cf06653..d9ec7e5 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint.ll
@@ -16,9 +16,9 @@ entry:
 ; CHECK-NEXT:  blr  x16
 ; CHECK:       ret
   %resolveCall2 = inttoptr i64 244837814094590 to i8*
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
   %resolveCall3 = inttoptr i64 244837814094591 to i8*
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 20, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 20, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
   ret i64 %result
 }
 
@@ -38,7 +38,7 @@ entry:
   store i64 11, i64* %metadata
   store i64 12, i64* %metadata
   store i64 13, i64* %metadata
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
   ret void
 }
 
@@ -51,14 +51,14 @@ entry:
   %tmp80 = add i64 %tmp79, -16
   %tmp81 = inttoptr i64 %tmp80 to i64*
   %tmp82 = load i64, i64* %tmp81, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
   %tmp83 = load i64, i64* %tmp33, align 8
   %tmp84 = add i64 %tmp83, -24
   %tmp85 = inttoptr i64 %tmp84 to i64*
   %tmp86 = load i64, i64* %tmp85, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
   ret i64 10
 }
 
@@ -74,7 +74,7 @@ entry:
 ; CHECK-NEXT: nop
 ; CHECK-NEXT: ldp
 ; CHECK-NEXT: ret
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/arm64-stackmap-nops.ll b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
index 5915b64..2647ac4 100644
--- a/test/CodeGen/AArch64/arm64-stackmap-nops.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
@@ -8,7 +8,7 @@ entry:
 ; CHECK:      nop
 ; CHECK-NEXT: nop
 ; CHECK-NOT:  nop
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  16)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  0, i32  16)
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 29e4484..1a4df7a 100644
--- a/test/CodeGen/AArch64/arm64-stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
@@ -78,7 +78,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define void @constantargs() {
 entry:
   %0 = inttoptr i64 244837814094590 to i8*
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 20, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 20, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
   ret void
 }
 
@@ -100,7 +100,7 @@ entry:
   ; Runtime void->void call.
   call void inttoptr (i64 244837814094590 to void ()*)()
   ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
   ret void
 }
 
@@ -126,7 +126,7 @@ entry:
 cold:
   ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
   %thunk = inttoptr i64 244837814094590 to i8*
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 20, i8* %thunk, i32 0, i64 %a, i64 %b)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 20, i8* %thunk, i32 0, i64 %a, i64 %b)
   unreachable
 ret:
   ret void
@@ -142,7 +142,7 @@ ret:
 define i64 @propertyRead(i64* %obj) {
 entry:
   %resolveRead = inttoptr i64 244837814094590 to i8*
-  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveRead, i32 1, i64* %obj)
+  %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveRead, i32 1, i64* %obj)
   %add = add i64 %result, 3
   ret i64 %add
 }
@@ -162,7 +162,7 @@ entry:
 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
 entry:
   %resolveWrite = inttoptr i64 244837814094590 to i8*
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
   ret void
 }
 
@@ -184,7 +184,7 @@ entry:
 define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
   %resolveCall = inttoptr i64 244837814094590 to i8*
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
   ret void
 }
 
@@ -206,7 +206,7 @@ entry:
 define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
   %resolveCall = inttoptr i64 244837814094590 to i8*
-  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
   %add = add i64 %result, 3
   ret i64 %add
 }
@@ -226,7 +226,7 @@ entry:
 ; CHECK-NEXT:   .short 29
 define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
 entry:
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 20, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 20, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
   ret void
 }
 
@@ -245,7 +245,7 @@ entry:
 ; CHECK-NEXT:   .short 29
 define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
 entry:
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
   ret void
 }
 
@@ -263,7 +263,7 @@ entry:
 ; CHECK-NEXT:   .long   33
 
 define void @liveConstant() {
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
   ret void
 }
 
@@ -280,7 +280,7 @@ define void @liveConstant() {
 ; CHECK-NEXT:   .long   -{{[0-9]+}}
 define void @clobberLR(i32 %a) {
   tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x31}"() nounwind
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/arm64-vshuffle.ll b/test/CodeGen/AArch64/arm64-vshuffle.ll
index 75e0d80..15ea21b 100644
--- a/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -1,22 +1,8 @@
 ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
 
 
-; The mask:
-; CHECK: lCPI0_0:
-; CHECK:  .byte   2                       ; 0x2
-; CHECK:  .byte   255                     ; 0xff
-; CHECK:  .byte   6                       ; 0x6
-; CHECK:  .byte   255                     ; 0xff
-; The second vector is legalized to undef and the elements of the first vector
-; are used instead.
-; CHECK:  .byte   2                       ; 0x2
-; CHECK:  .byte   4                       ; 0x4
-; CHECK:  .byte   6                       ; 0x6
-; CHECK:  .byte   0                       ; 0x0
 ; CHECK: test1
-; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI0_0
-; CHECK: movi.8h v[[REG1:[0-9]+]], #0x1, lsl #8
-; CHECK: tbl.8b  v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: movi d[[REG0:[0-9]+]], #0000000000000000
 define <8 x i1> @test1() {
 entry:
   %Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@@ -30,18 +16,16 @@ entry:
 
 ; CHECK: lCPI1_0:
 ; CHECK:          .byte   0                       ; 0x0
-; CHECK:          .byte   255                     ; 0xff
-; CHECK:          .byte   2                       ; 0x2
-; CHECK:          .byte   255                     ; 0xff
-; CHECK:          .byte   10                      ; 0xa
-; CHECK:          .byte   12                      ; 0xc
-; CHECK:          .byte   14                      ; 0xe
-; CHECK:          .byte   7                       ; 0x7
+; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .byte   1                       ; 0x1
+; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .byte   0                       ; 0x0
 ; CHECK: test2
-; CHECK: ldr     d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0@PAGEOFF]
-; CHECK: adrp    x[[REG2:[0-9]+]], lCPI1_1@PAGE
-; CHECK: ldr     q[[REG1:[0-9]+]], [x[[REG2]], lCPI1_1@PAGEOFF]
-; CHECK: tbl.8b  v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: adrp    x[[REG2:[0-9]+]], lCPI1_0@PAGE
+; CHECK: ldr     d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF]
 define <8 x i1>@test2() {
 bb:
   %Shuff = shufflevector <8 x i1> zeroinitializer,
@@ -51,28 +35,8 @@ bb:
   ret <8 x i1> %Shuff
 }
 
-; CHECK: lCPI2_0:
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   0                       ; 0x0
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   0                       ; 0x0
 ; CHECK: test3
-; CHECK: adrp    x[[REG3:[0-9]+]], lCPI2_0@PAGE
-; CHECK: ldr     q[[REG0:[0-9]+]], [x[[REG3]], lCPI2_0@PAGEOFF]
-; CHECK: ldr     q[[REG1:[0-9]+]], [x[[REG3]], lCPI2_1@PAGEOFF]
-; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: movi.4s v{{[0-9]+}}, #0x1
 define <16 x i1> @test3(i1* %ptr, i32 %v) {
 bb:
   %Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
@@ -81,29 +45,26 @@ bb:
                  i32 14, i32 0>
   ret <16 x i1> %Shuff
 }
-; CHECK: lCPI3_1:
+; CHECK: lCPI3_0:
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   1                       ; 0x1
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   18                      ; 0x12
-; CHECK:         .byte   4                       ; 0x4
-; CHECK:         .byte   5                       ; 0x5
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   7                       ; 0x7
-; CHECK:         .byte   8                       ; 0x8
-; CHECK:         .byte   31                      ; 0x1f
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   30                      ; 0x1e
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   13                      ; 0xd
-; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   15                      ; 0xf
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   0                       ; 0x0
 ; CHECK: _test4:
-; CHECK:         ldr     q[[REG1:[0-9]+]]
-; CHECK:         movi.2d v[[REG0:[0-9]+]], #0000000000000000
-; CHECK:         adrp    x[[REG3:[0-9]+]], lCPI3_1@PAGE
-; CHECK:         ldr     q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_1@PAGEOFF]
-; CHECK:         tbl.16b v{{[0-9]+}}, { v[[REG0]], v[[REG1]] }, v[[REG2]]
+; CHECK:         adrp    x[[REG3:[0-9]+]], lCPI3_0@PAGE
+; CHECK:         ldr     q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
 define <16 x i1> @test4(i1* %ptr, i32 %v) {
 bb:
   %Shuff = shufflevector <16 x i1> zeroinitializer,
diff --git a/test/CodeGen/AArch64/bitcast.ll b/test/CodeGen/AArch64/bitcast.ll
new file mode 100644
index 0000000..e88ea9e
--- /dev/null
+++ b/test/CodeGen/AArch64/bitcast.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
+
+; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.
+
+define <4 x i16> @foo1(<2 x i32> %a) {
+; CHECK-LABEL: foo1:
+; CHECK:       movi	d0, #0000000000000000
+; CHECK-NEXT:  ret
+
+  %1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
+; Can't optimize the following bitcast to scalar_to_vector.
+  %2 = bitcast <2 x i32> %1 to <4 x i16>
+  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  ret <4 x i16> %3
+}
+
+define <4 x i16> @foo2(<2 x i32> %a) {
+; CHECK-LABEL: foo2:
+; CHECK:       movi	d0, #0000000000000000
+; CHECK-NEXT:  ret
+
+  %1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
+; Can't optimize the following bitcast to scalar_to_vector.
+  %2 = bitcast <2 x i32> %1 to <4 x i16>
+  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  ret <4 x i16> %3
+}
diff --git a/test/CodeGen/AArch64/br-to-eh-lpad.ll b/test/CodeGen/AArch64/br-to-eh-lpad.ll
index e948b87..f304ba4 100644
--- a/test/CodeGen/AArch64/br-to-eh-lpad.ll
+++ b/test/CodeGen/AArch64/br-to-eh-lpad.ll
@@ -30,12 +30,12 @@ invoke.cont7:
   unreachable
 
 if.end50.thread:
-  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 125)
-  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 128)
+  tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 125)
+  tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 128)
   unreachable
 
 invoke.cont33:
-  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 119)
+  tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 119)
   unreachable
 
 invoke.cont41:
@@ -51,7 +51,7 @@ lpad40:
   br label %finally.catchall
 
 finally.catchall:
-  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 125)
+  tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 125)
   unreachable
 }
 
diff --git a/test/CodeGen/AArch64/concat_vector-scalar-combine.ll b/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
new file mode 100644
index 0000000..1c64af6
--- /dev/null
+++ b/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Test the (concat_vectors (bitcast (scalar)), ..) pattern.
+
+define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
+; CHECK-NEXT: dup.4h v0, w0
+; CHECK-NEXT: ret
+  %t = trunc i32 %x to i16
+  %0 = bitcast i16 %t to <2 x i8>
+  %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <8 x i8> %1
+}
+
+define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
+; CHECK-NEXT: dup.2s v0, w0
+; CHECK-NEXT: ret
+  %0 = bitcast i32 %x to <4 x i8>
+  %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i8> %1
+}
+
+define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
+; CHECK-NEXT: dup.4s v0, w0
+; CHECK-NEXT: ret
+  %0 = bitcast i32 %x to <2 x i16>
+  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
+  ret <8 x i16> %1
+}
+
+define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
+; CHECK-NEXT: ins.h v0[0], w0
+; CHECK-NEXT: ins.h v0[1], w1
+; CHECK-NEXT: ins.h v0[3], w1
+; CHECK-NEXT: ret
+  %tx = trunc i32 %x to i16
+  %ty = trunc i32 %y to i16
+  %bx = bitcast i16 %tx to <2 x i8>
+  %by = bitcast i16 %ty to <2 x i8>
+  %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
+  ret <8 x i8> %r
+}
+
+define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: ins.s v0[1], w0
+; CHECK-NEXT: ret
+  %bx = bitcast i32 %x to <4 x i8>
+  %by = bitcast i32 %y to <4 x i8>
+  %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i8> %r
+}
+
+define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32 %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ins.s v0[1], w1
+; CHECK-NEXT: ins.s v0[2], w1
+; CHECK-NEXT: ins.s v0[3], w0
+; CHECK-NEXT: ret
+  %bx = bitcast i32 %x to <2 x i16>
+  %by = bitcast i32 %y to <2 x i16>
+  %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
+  ret <8 x i16> %r
+}
+
+; Also make sure we minimize bitcasts.
+
+; This is a pretty artificial testcase: make sure we bitcast to floating-point
+; if any of the scalars is floating-point.
+define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, half %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
+; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
+; CHECK-NEXT: ins.h v0[0], v[[X]][0]
+; CHECK-NEXT: ins.h v0[1], v1[0]
+; CHECK-NEXT: ins.h v0[2], v[[X]][0]
+; CHECK-NEXT: ins.h v0[3], v1[0]
+; CHECK-NEXT: ret
+  %t = trunc i32 %x to i16
+  %0 = bitcast i16 %t to <2 x i8>
+  %y0 = bitcast half %y to <2 x i8>
+  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i8> %1
+}
+
+define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
+; CHECK-NEXT: ins.h v0[0], v1[0]
+; CHECK-NEXT: ins.h v0[1], v2[0]
+; CHECK-NEXT: ins.h v0[2], v1[0]
+; CHECK-NEXT: ins.h v0[3], v2[0]
+; CHECK-NEXT: ret
+  %0 = bitcast half %x to <2 x i8>
+  %y0 = bitcast half %y to <2 x i8>
+  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = bitcast <8 x i8> %1 to <2 x float>
+  ret <2 x float> %2
+}
+
+define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
+; CHECK-NEXT: dup.4s v0, v0[0]
+; CHECK-NEXT: ret
+  %0 = bitcast float %x to <2 x i16>
+  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
+  %2 = bitcast <8 x i16> %1 to <4 x float>
+  ret <4 x float> %2
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/concat_vector-truncate-combine.ll b/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
index c510e27..ee52786 100644
--- a/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
+++ b/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
@@ -2,6 +2,8 @@
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
+; Test the (concat_vectors (trunc), (trunc)) pattern.
+
 define <4 x i16> @test_concat_truncate_v2i64_to_v4i16(<2 x i64> %a, <2 x i64> %b) #0 {
 entry:
 ; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i16:
diff --git a/test/CodeGen/AArch64/concat_vector-truncated-scalar-combine.ll b/test/CodeGen/AArch64/concat_vector-truncated-scalar-combine.ll
new file mode 100644
index 0000000..eb6c80d
--- /dev/null
+++ b/test/CodeGen/AArch64/concat_vector-truncated-scalar-combine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Test the (concat_vectors (bitcast (trunc (scalar))), undef..) pattern.
+
+define <8 x i8> @test_concat_from_truncated_scalar(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_from_truncated_scalar:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ret
+  %t = trunc i32 %x to i16
+  %0 = bitcast i16 %t to <2 x i8>
+  %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i8> %1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/dag-combine-invaraints.ll b/test/CodeGen/AArch64/dag-combine-invaraints.ll
index 3614133..ac2d057 100644
--- a/test/CodeGen/AArch64/dag-combine-invaraints.ll
+++ b/test/CodeGen/AArch64/dag-combine-invaraints.ll
@@ -20,7 +20,7 @@ main_:
   %DHSelect = select i1 %tmp8, i32 %tmp9, i32 %tmp10
   store i32 %DHSelect, i32* %i32X, align 4
   %tmp15 = load i32, i32* %i32X, align 4
-  %tmp17 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str2, i32 0, i32 0), i32 %tmp15)
+  %tmp17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str2, i32 0, i32 0), i32 %tmp15)
   ret i32 0
 
 ; CHECK: main:
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll
new file mode 100644
index 0000000..be5e2e5
--- /dev/null
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -0,0 +1,765 @@
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_fadd:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fadd(half %a, half %b) #0 {
+  %r = fadd half %a, %b
+  ret half %r
+}
+
+; CHECK-LABEL: test_fsub:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fsub s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fsub(half %a, half %b) #0 {
+  %r = fsub half %a, %b
+  ret half %r
+}
+
+; CHECK-LABEL: test_fmul:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fmul(half %a, half %b) #0 {
+  %r = fmul half %a, %b
+  ret half %r
+}
+
+; CHECK-LABEL: test_fdiv:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fdiv(half %a, half %b) #0 {
+  %r = fdiv half %a, %b
+  ret half %r
+}
+
+; CHECK-LABEL: test_frem:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}fmodf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_frem(half %a, half %b) #0 {
+  %r = frem half %a, %b
+  ret half %r
+}
+
+; CHECK-LABEL: test_store:
+; CHECK-NEXT: str  h0, [x0]
+; CHECK-NEXT: ret
+define void @test_store(half %a, half* %b) #0 {
+  store half %a, half* %b
+  ret void
+}
+
+; CHECK-LABEL: test_load:
+; CHECK-NEXT: ldr  h0, [x0]
+; CHECK-NEXT: ret
+define half @test_load(half* %a) #0 {
+  %r = load half, half* %a
+  ret half %r
+}
+
+
+declare half @test_callee(half %a, half %b) #0
+
+; CHECK-LABEL: test_call:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: bl {{_?}}test_callee
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_call(half %a, half %b) #0 {
+  %r = call half @test_callee(half %a, half %b)
+  ret half %r
+}
+
+; CHECK-LABEL: test_call_flipped:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: mov.16b  v2, v0
+; CHECK-NEXT: mov.16b  v0, v1
+; CHECK-NEXT: mov.16b  v1, v2
+; CHECK-NEXT: bl {{_?}}test_callee
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_call_flipped(half %a, half %b) #0 {
+  %r = call half @test_callee(half %b, half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_tailcall_flipped:
+; CHECK-NEXT: mov.16b  v2, v0
+; CHECK-NEXT: mov.16b  v0, v1
+; CHECK-NEXT: mov.16b  v1, v2
+; CHECK-NEXT: b {{_?}}test_callee
+define half @test_tailcall_flipped(half %a, half %b) #0 {
+  %r = tail call half @test_callee(half %b, half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_select:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: cmp  w0, #0
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
+  %r = select i1 %c, half %a, half %b
+  ret half %r
+}
+
+; CHECK-LABEL: test_select_cc:
+; CHECK-DAG: fcvt s3, h3
+; CHECK-DAG: fcvt s2, h2
+; CHECK-DAG: fcvt s1, h1
+; CHECK-DAG: fcvt s0, h0
+; CHECK-DAG: fcmp s2, s3
+; CHECK-DAG: cset [[CC:w[0-9]+]], ne
+; CHECK-DAG: cmp [[CC]], #0
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
+  %cc = fcmp une half %c, %d
+  %r = select i1 %cc, half %a, half %b
+  ret half %r
+}
+
+; CHECK-LABEL: test_fcmp_une:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, ne
+; CHECK-NEXT: ret
+define i1 @test_fcmp_une(half %a, half %b) #0 {
+  %r = fcmp une half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ueq:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: orr [[TRUE:w[0-9]+]], wzr, #0x1
+; CHECK-NEXT: csel [[CC:w[0-9]+]], [[TRUE]], wzr, eq
+; CHECK-NEXT: csel w0, [[TRUE]], [[CC]], vs
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ueq(half %a, half %b) #0 {
+  %r = fcmp ueq half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ugt:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, hi
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ugt(half %a, half %b) #0 {
+  %r = fcmp ugt half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_uge:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, pl
+; CHECK-NEXT: ret
+define i1 @test_fcmp_uge(half %a, half %b) #0 {
+  %r = fcmp uge half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ult:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, lt
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ult(half %a, half %b) #0 {
+  %r = fcmp ult half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ule:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, le
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ule(half %a, half %b) #0 {
+  %r = fcmp ule half %a, %b
+  ret i1 %r
+}
+
+
+; CHECK-LABEL: test_fcmp_uno:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, vs
+; CHECK-NEXT: ret
+define i1 @test_fcmp_uno(half %a, half %b) #0 {
+  %r = fcmp uno half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_one:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: orr [[TRUE:w[0-9]+]], wzr, #0x1
+; CHECK-NEXT: csel [[CC:w[0-9]+]], [[TRUE]], wzr, mi
+; CHECK-NEXT: csel w0, [[TRUE]], [[CC]], gt
+; CHECK-NEXT: ret
+define i1 @test_fcmp_one(half %a, half %b) #0 {
+  %r = fcmp one half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_oeq:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, eq
+; CHECK-NEXT: ret
+define i1 @test_fcmp_oeq(half %a, half %b) #0 {
+  %r = fcmp oeq half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ogt:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, gt
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ogt(half %a, half %b) #0 {
+  %r = fcmp ogt half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_oge:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, ge
+; CHECK-NEXT: ret
+define i1 @test_fcmp_oge(half %a, half %b) #0 {
+  %r = fcmp oge half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_olt:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, mi
+; CHECK-NEXT: ret
+define i1 @test_fcmp_olt(half %a, half %b) #0 {
+  %r = fcmp olt half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ole:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, ls
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ole(half %a, half %b) #0 {
+  %r = fcmp ole half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ord:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset  w0, vc
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ord(half %a, half %b) #0 {
+  %r = fcmp ord half %a, %b
+  ret i1 %r
+}
+
+; CHECK-LABEL: test_br_cc:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: b.mi [[BRCC_ELSE:.?LBB[0-9_]+]]
+; CHECK-NEXT: str  wzr, [x0]
+; CHECK-NEXT: ret
+; CHECK-NEXT: [[BRCC_ELSE]]:
+; CHECK-NEXT: str  wzr, [x1]
+; CHECK-NEXT: ret
+define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
+  %c = fcmp uge half %a, %b
+  br i1 %c, label %then, label %else
+then:
+  store i32 0, i32* %p1
+  ret void
+else:
+  store i32 0, i32* %p2
+  ret void
+}
+
+; CHECK-LABEL: test_phi:
+; CHECK: mov  x[[PTR:[0-9]+]], x0
+; CHECK: ldr  h[[AB:[0-9]+]], [x[[PTR]]]
+; CHECK: [[LOOP:LBB[0-9_]+]]:
+; CHECK: mov.16b  v[[R:[0-9]+]], v[[AB]]
+; CHECK: ldr  h[[AB]], [x[[PTR]]]
+; CHECK: mov  x0, x[[PTR]]
+; CHECK: bl {{_?}}test_dummy
+; CHECK: mov.16b  v0, v[[R]]
+; CHECK: ret
+define half @test_phi(half* %p1) #0 {
+entry:
+  %a = load half, half* %p1
+  br label %loop
+loop:
+  %r = phi half [%a, %entry], [%b, %loop]
+  %b = load half, half* %p1
+  %c = call i1 @test_dummy(half* %p1)
+  br i1 %c, label %loop, label %return
+return:
+  ret half %r
+}
+declare i1 @test_dummy(half* %p1) #0
+
+; CHECK-LABEL: test_fptosi_i32:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzs w0, s0
+; CHECK-NEXT: ret
+define i32 @test_fptosi_i32(half %a) #0 {
+  %r = fptosi half %a to i32
+  ret i32 %r
+}
+
+; CHECK-LABEL: test_fptosi_i64:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzs x0, s0
+; CHECK-NEXT: ret
+define i64 @test_fptosi_i64(half %a) #0 {
+  %r = fptosi half %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: test_fptoui_i32:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzu w0, s0
+; CHECK-NEXT: ret
+define i32 @test_fptoui_i32(half %a) #0 {
+  %r = fptoui half %a to i32
+  ret i32 %r
+}
+
+; CHECK-LABEL: test_fptoui_i64:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzu x0, s0
+; CHECK-NEXT: ret
+define i64 @test_fptoui_i64(half %a) #0 {
+  %r = fptoui half %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: test_uitofp_i32:
+; CHECK-NEXT: ucvtf s0, w0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_uitofp_i32(i32 %a) #0 {
+  %r = uitofp i32 %a to half
+  ret half %r
+}
+
+; CHECK-LABEL: test_uitofp_i64:
+; CHECK-NEXT: ucvtf s0, x0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_uitofp_i64(i64 %a) #0 {
+  %r = uitofp i64 %a to half
+  ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i32:
+; CHECK-NEXT: scvtf s0, w0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_sitofp_i32(i32 %a) #0 {
+  %r = sitofp i32 %a to half
+  ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i64:
+; CHECK-NEXT: scvtf s0, x0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_sitofp_i64(i64 %a) #0 {
+  %r = sitofp i64 %a to half
+  ret half %r
+}
+
+; CHECK-LABEL: test_fptrunc_float:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+
+define half @test_fptrunc_float(float %a) #0 {
+  %r = fptrunc float %a to half
+  ret half %r
+}
+
+; CHECK-LABEL: test_fptrunc_double:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: ret
+define half @test_fptrunc_double(double %a) #0 {
+  %r = fptrunc double %a to half
+  ret half %r
+}
+
+; CHECK-LABEL: test_fpext_float:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: ret
+define float @test_fpext_float(half %a) #0 {
+  %r = fpext half %a to float
+  ret float %r
+}
+
+; CHECK-LABEL: test_fpext_double:
+; CHECK-NEXT: fcvt d0, h0
+; CHECK-NEXT: ret
+define double @test_fpext_double(half %a) #0 {
+  %r = fpext half %a to double
+  ret double %r
+}
+
+
+; CHECK-LABEL: test_bitcast_halftoi16:
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+define i16 @test_bitcast_halftoi16(half %a) #0 {
+  %r = bitcast half %a to i16
+  ret i16 %r
+}
+
+; CHECK-LABEL: test_bitcast_i16tohalf:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ret
+define half @test_bitcast_i16tohalf(i16 %a) #0 {
+  %r = bitcast i16 %a to half
+  ret half %r
+}
+
+
+declare half @llvm.sqrt.f16(half %a) #0
+declare half @llvm.powi.f16(half %a, i32 %b) #0
+declare half @llvm.sin.f16(half %a) #0
+declare half @llvm.cos.f16(half %a) #0
+declare half @llvm.pow.f16(half %a, half %b) #0
+declare half @llvm.exp.f16(half %a) #0
+declare half @llvm.exp2.f16(half %a) #0
+declare half @llvm.log.f16(half %a) #0
+declare half @llvm.log10.f16(half %a) #0
+declare half @llvm.log2.f16(half %a) #0
+declare half @llvm.fma.f16(half %a, half %b, half %c) #0
+declare half @llvm.fabs.f16(half %a) #0
+declare half @llvm.minnum.f16(half %a, half %b) #0
+declare half @llvm.maxnum.f16(half %a, half %b) #0
+declare half @llvm.copysign.f16(half %a, half %b) #0
+declare half @llvm.floor.f16(half %a) #0
+declare half @llvm.ceil.f16(half %a) #0
+declare half @llvm.trunc.f16(half %a) #0
+declare half @llvm.rint.f16(half %a) #0
+declare half @llvm.nearbyint.f16(half %a) #0
+declare half @llvm.round.f16(half %a) #0
+declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
+
+; CHECK-LABEL: test_sqrt:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fsqrt s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_sqrt(half %a) #0 {
+  %r = call half @llvm.sqrt.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_powi:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}__powisf2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_powi(half %a, i32 %b) #0 {
+  %r = call half @llvm.powi.f16(half %a, i32 %b)
+  ret half %r
+}
+
+; CHECK-LABEL: test_sin:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}sinf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_sin(half %a) #0 {
+  %r = call half @llvm.sin.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_cos:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}cosf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_cos(half %a) #0 {
+  %r = call half @llvm.cos.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_pow:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}powf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_pow(half %a, half %b) #0 {
+  %r = call half @llvm.pow.f16(half %a, half %b)
+  ret half %r
+}
+
+; CHECK-LABEL: test_exp:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}expf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_exp(half %a) #0 {
+  %r = call half @llvm.exp.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_exp2:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}exp2f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_exp2(half %a) #0 {
+  %r = call half @llvm.exp2.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_log:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}logf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_log(half %a) #0 {
+  %r = call half @llvm.log.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_log10:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}log10f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_log10(half %a) #0 {
+  %r = call half @llvm.log10.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_log2:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}log2f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_log2(half %a) #0 {
+  %r = call half @llvm.log2.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_fma:
+; CHECK-NEXT: fcvt s2, h2
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmadd s0, s0, s1, s2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fma(half %a, half %b, half %c) #0 {
+  %r = call half @llvm.fma.f16(half %a, half %b, half %c)
+  ret half %r
+}
+
+; CHECK-LABEL: test_fabs:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fabs(half %a) #0 {
+  %r = call half @llvm.fabs.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_minnum:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}fminf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_minnum(half %a, half %b) #0 {
+  %r = call half @llvm.minnum.f16(half %a, half %b)
+  ret half %r
+}
+
+; CHECK-LABEL: test_maxnum:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov  x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}fmaxf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_maxnum(half %a, half %b) #0 {
+  %r = call half @llvm.maxnum.f16(half %a, half %b)
+  ret half %r
+}
+
+; CHECK-LABEL: test_copysign:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_copysign(half %a, half %b) #0 {
+  %r = call half @llvm.copysign.f16(half %a, half %b)
+  ret half %r
+}
+
+; CHECK-LABEL: test_floor:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frintm s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_floor(half %a) #0 {
+  %r = call half @llvm.floor.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_ceil:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frintp s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_ceil(half %a) #0 {
+  %r = call half @llvm.ceil.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_trunc:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frintz s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_trunc(half %a) #0 {
+  %r = call half @llvm.trunc.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_rint:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: frintx s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_rint(half %a) #0 {
+  %r = call half @llvm.rint.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_nearbyint:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: frinti s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_nearbyint(half %a) #0 {
+  %r = call half @llvm.nearbyint.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_round:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frinta s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_round(half %a) #0 {
+  %r = call half @llvm.round.f16(half %a)
+  ret half %r
+}
+
+; CHECK-LABEL: test_fmuladd:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h2
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fmuladd(half %a, half %b, half %c) #0 {
+  %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
+  ret half %r
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext5.ll b/test/CodeGen/AArch64/fast-isel-int-ext5.ll
new file mode 100644
index 0000000..0f9ec62
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-int-ext5.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: int_ext_opt
+define i64 @int_ext_opt(i8* %addr, i1 %c1, i1 %c2) {
+entry:
+  %0 = load i8, i8* %addr
+  br i1 %c1, label %bb1, label %bb2
+
+bb1:
+  %1 = zext i8 %0 to i64
+  br i1 %c2, label %bb2, label %exit
+
+bb2:
+  %2 = phi i64 [1, %entry], [%1, %bb1]
+  ret i64 %2
+
+exit:
+  ret i64 0
+}
diff --git a/test/CodeGen/AArch64/fold-constants.ll b/test/CodeGen/AArch64/fold-constants.ll
new file mode 100644
index 0000000..2dd0d12
--- /dev/null
+++ b/test/CodeGen/AArch64/fold-constants.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define i64 @dotests_616() {
+; CHECK-LABEL: dotests_616
+; CHECK:       movi d0, #0000000000000000
+; CHECK-NEXT:  umov w8, v0.b[2]
+; CHECK-NEXT:  sbfx w8, w8, #0, #1
+; CHECK-NEXT:  fmov s0, w8
+; CHECK-NEXT:  fmov x0, d0
+; CHECK-NEXT:  ret
+entry:
+  %0 = bitcast <2 x i64> zeroinitializer to <8 x i16>
+  %1 = and <8 x i16> zeroinitializer, %0
+  %2 = icmp ne <8 x i16> %1, zeroinitializer
+  %3 = extractelement <8 x i1> %2, i32 2
+  %vgetq_lane285 = sext i1 %3 to i16
+  %vset_lane = insertelement <4 x i16> undef, i16 %vgetq_lane285, i32 0
+  %4 = bitcast <4 x i16> %vset_lane to <1 x i64>
+  %vget_lane = extractelement <1 x i64> %4, i32 0
+  ret i64 %vget_lane
+}
diff --git a/test/CodeGen/AArch64/fp16-instructions.ll b/test/CodeGen/AArch64/fp16-instructions.ll
deleted file mode 100644
index ba96694..0000000
--- a/test/CodeGen/AArch64/fp16-instructions.ll
+++ /dev/null
@@ -1,109 +0,0 @@
-; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
-
-define half @add_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: add_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fadd [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
-  %0 = fadd half %a, %b
-  ret half %0
-}
-
-
-define half @sub_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: sub_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fsub [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
-  %0 = fsub half %a, %b
-  ret half %0
-}
-
-
-define half @mul_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: mul_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fmul [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
-  %0 = fmul half %a, %b
-  ret half %0
-}
-
-
-define half @div_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: div_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fdiv [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
-  %0 = fdiv half %a, %b
-  ret half %0
-}
-
-
-define half @load_h(half* %a) {
-entry:
-; CHECK-LABEL: load_h:
-; CHECK: ldr h0, [x0]
-  %0 = load half, half* %a, align 4
-  ret half %0
-}
-
-
-define void @store_h(half* %a, half %b) {
-entry:
-; CHECK-LABEL: store_h:
-; CHECK: str h0, [x0]
-  store half %b, half* %a, align 4
-  ret void
-}
-
-define half @s_to_h(float %a) {
-; CHECK-LABEL: s_to_h:
-; CHECK: fcvt h0, s0
-  %1 = fptrunc float %a to half
-  ret half %1
-}
-
-define half @d_to_h(double %a) {
-; CHECK-LABEL: d_to_h:
-; CHECK: fcvt h0, d0
-  %1 = fptrunc double %a to half
-  ret half %1
-}
-
-define float @h_to_s(half %a) {
-; CHECK-LABEL: h_to_s:
-; CHECK: fcvt s0, h0
-  %1 = fpext half %a to float
-  ret float %1
-}
-
-define double @h_to_d(half %a) {
-; CHECK-LABEL: h_to_d:
-; CHECK: fcvt d0, h0
-  %1 = fpext half %a to double
-  ret double %1
-}
-
-define half @bitcast_i_to_h(i16 %a) {
-; CHECK-LABEL: bitcast_i_to_h:
-; CHECK: fmov s0, w0
-  %1 = bitcast i16 %a to half
-  ret half %1
-}
-
-
-define i16 @bitcast_h_to_i(half %a) {
-; CHECK-LABEL: bitcast_h_to_i:
-; CHECK: fmov w0, s0
-  %1 = bitcast half %a to i16
-  ret i16 %1
-}
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
index b404389..14b0430 100644
--- a/test/CodeGen/AArch64/global-merge-1.ll
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -1,11 +1,11 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -O3 -enable-global-merge -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
 
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -O3 -enable-global-merge -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
 
-; RUN: llc %s -mtriple=aarch64-apple-ios -O3 -enable-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
-; RUN: llc %s -mtriple=aarch64-apple-ios -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
 
 @m = internal global i32 0, align 4
 @n = internal global i32 0, align 4
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
index d5967b9..af68403 100644
--- a/test/CodeGen/AArch64/global-merge-2.ll
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -1,6 +1,6 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
 
 @x = global i32 0, align 4
 @y = global i32 0, align 4
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
index 15035c0..9251083 100644
--- a/test/CodeGen/AArch64/global-merge-3.ll
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -1,6 +1,6 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -O3 -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
 
 @x = global [1000 x i32] zeroinitializer, align 1
 @y = global [1000 x i32] zeroinitializer, align 1
diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll
index 8fb7747..bc6b68a 100644
--- a/test/CodeGen/AArch64/global-merge-4.ll
+++ b/test/CodeGen/AArch64/global-merge-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -O3 -enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -o - | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
 target triple = "arm64-apple-ios7.0.0"
diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll
new file mode 100644
index 0000000..18dbad4
--- /dev/null
+++ b/test/CodeGen/AArch64/merge-store.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march aarch64 %s -o - | FileCheck %s
+
+@g0 = external global <3 x float>, align 16
+@g1 = external global <3 x float>, align 4
+
+; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4
+; CHECK: ld1{{\.?s?}} { v[[R0]]{{\.?s?}} }[1], {{\[}}[[R1]]{{\]}}
+; CHECK: str d[[R0]]
+
+define void @blam() {
+  %tmp4 = getelementptr inbounds <3 x float>, <3 x float>* @g1, i64 0, i64 0
+  %tmp5 = load <3 x float>, <3 x float>* @g0, align 16
+  %tmp6 = extractelement <3 x float> %tmp5, i64 0
+  store float %tmp6, float* %tmp4
+  %tmp7 = getelementptr inbounds float, float* %tmp4, i64 1
+  %tmp8 = load <3 x float>, <3 x float>* @g0, align 16
+  %tmp9 = extractelement <3 x float> %tmp8, i64 1
+  store float %tmp9, float* %tmp7
+  ret void;
+}
diff --git a/test/CodeGen/AArch64/print-mrs-system-register.ll b/test/CodeGen/AArch64/print-mrs-system-register.ll
new file mode 100644
index 0000000..3411ed6
--- /dev/null
+++ b/test/CodeGen/AArch64/print-mrs-system-register.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=arm64-apple-darwin %s -o - | FileCheck %s
+
+; CHECK: mrs x0, CPM_IOACC_CTL_EL3
+
+define void @foo1() #0 {
+entry:
+  tail call void asm sideeffect "mrs x0, cpm_ioacc_ctl_el3", ""()
+  ret void
+}
+
+attributes #0 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
index 34d45d8..a68fdec 100644
--- a/test/CodeGen/AArch64/sibling-call.ll
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -75,8 +75,8 @@ define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
 
 ; CHECK: ldr [[VAL0:x[0-9]+]],
 ; CHECK: ldr [[VAL1:x[0-9]+]],
-; CHECK: str [[VAL1]],
 ; CHECK: str [[VAL0]],
+; CHECK: str [[VAL1]],
 
 ; CHECK-NOT: add sp, sp,
 ; CHECK: b callee_stack16
diff --git a/test/CodeGen/AArch64/stackmap-liveness.ll b/test/CodeGen/AArch64/stackmap-liveness.ll
new file mode 100644
index 0000000..6b37aac
--- /dev/null
+++ b/test/CodeGen/AArch64/stackmap-liveness.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=aarch64-apple-darwin | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT:   __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 1
+; Num LargeConstants
+; CHECK-NEXT:   .long   0
+; Num Callsites
+; CHECK-NEXT:   .long   1
+
+; Functions and stack size
+; CHECK-NEXT:   .quad _stackmap_liveness
+; CHECK-NEXT:   .quad 16
+
+; Test that the return register is recognized as an live-out.
+define i64 @stackmap_liveness(i1 %c) {
+; CHECK-LABEL:  .long L{{.*}}-_stackmap_liveness
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; Padding
+; CHECK-NEXT:   .short  0
+; Num LiveOut Entries: 1
+; CHECK-NEXT:   .short  2
+; LiveOut Entry 0: X0
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .byte 8
+; LiveOut Entry 1: SP
+; CHECK-NEXT:   .short 31
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .byte 8
+; Align
+; CHECK-NEXT:   .align  3
+  %1 = select i1 %c, i64 1, i64 2
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 32, i8* null, i32 0)
+  ret i64 %1
+}
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
new file mode 100644
index 0000000..4d80f2a
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Check that we don't try to tail-call with a non-forwarded sret parameter.
+declare void @test_explicit_sret(i1024* sret) #0
+
+; This is the only OK case, where we forward the explicit sret pointer.
+
+; CHECK-LABEL: _test_tailcall_explicit_sret:
+; CHECK-NEXT: b _test_explicit_sret
+define void @test_tailcall_explicit_sret(i1024* sret %arg) #0 {
+  tail call void @test_explicit_sret(i1024* %arg)
+  ret void
+}
+
+; CHECK-LABEL: _test_call_explicit_sret:
+; CHECK-NOT: mov  x8
+; CHECK: bl _test_explicit_sret
+; CHECK: ret
+define void @test_call_explicit_sret(i1024* sret %arg) #0 {
+  call void @test_explicit_sret(i1024* %arg)
+  ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_unused:
+; CHECK: mov  x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_alloca_unused() #0 {
+  %l = alloca i1024, align 8
+  tail call void @test_explicit_sret(i1024* %l)
+  ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers:
+; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0]
+; CHECK: str [[PTRLOAD1]], [sp]
+; CHECK: mov  x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 {
+  %l = alloca i1024, align 8
+  %r = load i1024, i1024* %ptr, align 8
+  store i1024 %r, i1024* %l, align 8
+  tail call void @test_explicit_sret(i1024* %l)
+  ret void
+}
+
+; This is too conservative, but doesn't really happen in practice.
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_gep:
+; CHECK: add  x8, x0, #128
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK: ret
+define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
+  %ptr2 = getelementptr i1024, i1024* %ptr, i32 1
+  tail call void @test_explicit_sret(i1024* %ptr2)
+  ret void
+}
+
+; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_returned:
+; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov  x8, sp
+; CHECK-NEXT: bl _test_explicit_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
+  %l = alloca i1024, align 8
+  tail call void @test_explicit_sret(i1024* %l)
+  %r = load i1024, i1024* %l, align 8
+  ret i1024 %r
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg:
+; CHECK-DAG: mov  x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK-DAG: mov  [[FPTR:x[0-9]+]], x0
+; CHECK: mov  x0, sp
+; CHECK-NEXT: blr [[FPTR]]
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 {
+  %l = alloca i1024, align 8
+  tail call void %f(i1024* %l)
+  %r = load i1024, i1024* %l, align 8
+  store i1024 %r, i1024* %arg, align 8
+  ret void
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_:
+; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov  x8, sp
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 {
+  %ret = tail call i1024 %f()
+  store i1024 %ret, i1024* %arg, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
new file mode 100644
index 0000000..5d68059
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Check that we don't try to tail-call with an sret-demoted return.
+
+declare i1024 @test_sret() #0
+
+; CHECK-LABEL: _test_call_sret:
+; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov  x8, sp
+; CHECK-NEXT: bl _test_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_call_sret() #0 {
+  %a = call i1024 @test_sret()
+  ret i1024 %a
+}
+
+; CHECK-LABEL: _test_tailcall_sret:
+; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov  x8, sp
+; CHECK-NEXT: bl _test_sret
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_tailcall_sret() #0 {
+  %a = tail call i1024 @test_sret()
+  ret i1024 %a
+}
+
+; CHECK-LABEL: _test_indirect_tailcall_sret:
+; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
+; CHECK: mov  x8, sp
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ret
+define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 {
+  %a = tail call i1024 %f()
+  ret i1024 %a
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll
new file mode 100644
index 0000000..b970fb1
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy:
+; CHECK: b memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memmove:
+; CHECK: b memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memset:
+; CHECK: b memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/vcvt-oversize.ll b/test/CodeGen/AArch64/vcvt-oversize.ll
new file mode 100644
index 0000000..066a4b6
--- /dev/null
+++ b/test/CodeGen/AArch64/vcvt-oversize.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define <8 x i8> @float_to_i8(<8 x float>* %in) {
+; CHECK-LABEL: float_to_i8:
+; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s
+; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s
+; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
+; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
+; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s
+; CHECK-DAG: xtn2 v[[TMP]].8h, v[[MSB]].4s
+; CHECK-DAG: xtn v0.8b, v[[TMP]].8h
+  %l = load <8 x float>, <8 x float>* %in
+  %scale = fmul <8 x float> %l, <float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>
+  %conv = fptoui <8 x float> %scale to <8 x i8>
+  ret <8 x i8> %conv
+}
diff --git a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
index 0162d7f..7c42596 100644
--- a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
+++ b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -83,7 +83,7 @@ cond_next881:		; preds = %bb866
 	%tmp884885 = inttoptr i64 %tmp10959 to %struct.tree_identifier*		; <%struct.tree_identifier*> [#uses=1]
 	%tmp887 = getelementptr %struct.tree_identifier, %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0		; <i8**> [#uses=1]
 	%tmp888 = load i8*, i8** %tmp887		; <i8*> [#uses=1]
-	tail call void (i32, ...)* @error( i32 undef, i8* %tmp888 )
+	tail call void (i32, ...) @error( i32 undef, i8* %tmp888 )
 	ret void
 
 cond_true918:		; preds = %cond_false841
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index cf5094f..87863bd 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -5,7 +5,7 @@ entry:
 	%A = alloca [1123 x i32], align 16		; <[1123 x i32]*> [#uses=1]
 	%B = alloca [3123 x i32], align 16		; <[3123 x i32]*> [#uses=1]
 	%C = alloca [12312 x i32], align 16		; <[12312 x i32]*> [#uses=1]
-	%tmp = call i32 (...)* @bar( [3123 x i32]* %B, [1123 x i32]* %A, [12312 x i32]* %C )		; <i32> [#uses=0]
+	%tmp = call i32 (...) @bar( [3123 x i32]* %B, [1123 x i32]* %A, [12312 x i32]* %C )		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
index b687029..11f3003 100644
--- a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
+++ b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -10,7 +10,7 @@ define internal void @_ZN1B1iEv(%struct.B* %this) {
 entry:
 	%tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp2 = load i32, i32* %tmp1		; <i32> [#uses=1]
-	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8], [7 x i8]* @str, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
+	%tmp4 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @str, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
 	ret void
 }
 
@@ -20,7 +20,7 @@ define internal void @_ZN1B1jEv(%struct.B* %this) {
 entry:
 	%tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp2 = load i32, i32* %tmp1		; <i32> [#uses=1]
-	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8], [7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
+	%tmp4 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index ca168b6..50573b4 100644
--- a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -93,7 +93,7 @@ cond_true1272:		; preds = %cond_next1267
 	%tmp42.i348 = sub i32 0, %tmp2930.i		; <i32> [#uses=1]
 	%tmp45.i = getelementptr %struct.TestObj, %struct.TestObj* %tmp1273, i32 0, i32 0		; <i8**> [#uses=2]
 	%tmp48.i = load i8*, i8** %tmp45.i		; <i8*> [#uses=1]
-	%tmp50.i350 = call i32 (i8*, i8*, ...)* @sprintf( i8* getelementptr ([256 x i8], [256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8], [48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i )		; <i32> [#uses=0]
+	%tmp50.i350 = call i32 (i8*, i8*, ...) @sprintf( i8* getelementptr ([256 x i8], [256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8], [48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i )		; <i32> [#uses=0]
 	br i1 false, label %cond_true.i632.i, label %Ut_TraceMsg.exit648.i
 
 cond_true.i632.i:		; preds = %cond_true1272
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index 5895a32..f49c805 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -24,13 +24,13 @@ entry:
 	br i1 %toBool, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
-	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp3 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	br label %cond_next
 
 cond_false:		; preds = %entry
-	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp5 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	br label %cond_next
 
 cond_next:		; preds = %cond_false, %cond_true
@@ -41,17 +41,17 @@ cond_next:		; preds = %cond_false, %cond_true
 	br i1 %toBool10, label %cond_true11, label %cond_false15
 
 cond_true11:		; preds = %cond_next
-	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_false15:		; preds = %cond_next
-	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp16 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_next18:		; preds = %cond_false15, %cond_true11
-	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp19 = call i32 (...) @bar( )		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next18
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index abb6a33f..421d501 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -26,8 +26,8 @@ entry:
 	br i1 %toBool, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
-	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp3 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
@@ -35,8 +35,8 @@ cond_true:		; preds = %entry
 	br i1 %toBool10, label %cond_true11, label %cond_false15
 
 cond_false:		; preds = %entry
-	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp5 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
@@ -44,17 +44,17 @@ cond_false:		; preds = %entry
 	br i1 %toBool210, label %cond_true11, label %cond_false15
 
 cond_true11:		; preds = %cond_next
-	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_false15:		; preds = %cond_next
-	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp16 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_next18:		; preds = %cond_false15, %cond_true11
-	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp19 = call i32 (...) @bar( )		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next18
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 1edaefb..52cc37e 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -36,8 +36,8 @@ entry:
 	br i1 %toBool, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
-	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp3 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
@@ -45,8 +45,8 @@ cond_true:		; preds = %entry
 	br i1 %toBool10, label %cond_true11, label %cond_false15
 
 cond_false:		; preds = %entry
-	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp5 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
@@ -54,17 +54,17 @@ cond_false:		; preds = %entry
 	br i1 %toBool210, label %cond_true11, label %cond_false15
 
 cond_true11:		; preds = %cond_next
-	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_false15:		; preds = %cond_next
-	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp16 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_next18:		; preds = %cond_false15, %cond_true11
-	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp19 = call i32 (...) @bar( )		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next18
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 5ee8b46..753f9e3 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -13,7 +13,7 @@ bb88.i:		; preds = %bb74.i
 mandel.exit:		; preds = %bb88.i
 	%tmp2 = load volatile double, double* getelementptr ({ double, double }, { double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
-	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
+	%tmp5 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index bd1f174..1ededa3 100644
--- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -73,10 +73,10 @@ bb609.i.i:		; preds = %cond_next602.i.i
 	br label %bb620.i.i
 bb620.i.i:		; preds = %bb620.i.i, %bb609.i.i
 	%indvar166.i465.i = phi i32 [ %indvar.next167.i.i, %bb620.i.i ], [ 0, %bb609.i.i ]		; <i32> [#uses=1]
-	%tmp640.i.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8], [5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
+	%tmp640.i.i = call i32 (%struct.FILE*, i8*, ...) @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8], [5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
 	%tmp648.i.i = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp650.i468.i = icmp sgt i32 0, %tmp648.i.i		; <i1> [#uses=1]
-	%tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8], [5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
+	%tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...) @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8], [5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
 	%indvar.next167.i.i = add i32 %indvar166.i465.i, 1		; <i32> [#uses=1]
 	br i1 %tmp650.i468.i, label %bb653.i.i.loopexit, label %bb620.i.i
 bb653.i.i.loopexit:		; preds = %bb620.i.i
diff --git a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
index d090da0..cad5440 100644
--- a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -81,7 +81,7 @@ bb244:		; preds = %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122
 	br i1 %0, label %bb435, label %bb433
 
 bb394:		; preds = %bb122
-	call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 3, i8* getelementptr ([23 x i8], [23 x i8]* @"\01LC13423", i32 0, i32 0), i32 0, %struct.FILE_POS* @no_file_pos, i8* getelementptr ([13 x i8], [13 x i8]* @"\01LC18972", i32 0, i32 0), i8* null) nounwind
+	call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 3, i8* getelementptr ([23 x i8], [23 x i8]* @"\01LC13423", i32 0, i32 0), i32 0, %struct.FILE_POS* @no_file_pos, i8* getelementptr ([13 x i8], [13 x i8]* @"\01LC18972", i32 0, i32 0), i8* null) nounwind
 	br label %bb396
 
 bb396:		; preds = %bb394, %bb131, %bb122, %bb122, %bb122, %bb122, %RESUME
diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll
index 606c6b1..e0f9485 100644
--- a/test/CodeGen/ARM/2009-04-08-FREM.ll
+++ b/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -4,6 +4,6 @@ declare i32 @printf(i8*, ...)
 
 define i32 @main() {
 	%rem_r = frem double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%1 = call i32 (i8*, ...)* @printf(i8* null, double %rem_r)		; <i32> [#uses=0]
+	%1 = call i32 (i8*, ...) @printf(i8* null, double %rem_r)		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
index 887fb0b..ac641f9 100644
--- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -5,7 +5,7 @@
 define i16 @fn16(i16 %arg0.0, <2 x i16> %arg1, i16 %arg2.0) nounwind {
 entry:
 	store <2 x i16> %arg1, <2 x i16>* null
-	%0 = call i32 (i8*, ...)* @printf(i8* getelementptr ([30 x i8], [30 x i8]* @.str, i32 0, i32 0), i32 0) nounwind		; <i32> [#uses=0]
+	%0 = call i32 (i8*, ...) @printf(i8* getelementptr ([30 x i8], [30 x i8]* @.str, i32 0, i32 0), i32 0) nounwind		; <i32> [#uses=0]
 	ret i16 0
 }
 
diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index b616cb3..ae005db 100644
--- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -19,7 +19,7 @@ bb1:		; preds = %bb
 
 bb3:		; preds = %bb1, %bb
 	%iftmp.0.0 = phi i32 [ 0, %bb1 ], [ -1, %bb ]		; <i32> [#uses=1]
-	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	%1 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
 	%2 = load %struct.List*, %struct.List** null, align 4		; <%struct.List*> [#uses=2]
 	%phitmp = icmp eq %struct.List* %2, null		; <i1> [#uses=1]
 	br i1 %phitmp, label %bb5, label %bb
diff --git a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
index 0612d51..7bbb809 100644
--- a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
+++ b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
@@ -57,6 +57,6 @@ Fft.exit.i:		; preds = %bb7.i.i
 	br i1 undef, label %bb5.i, label %bb1.outer2.i.i.outer
 
 bb5.i:		; preds = %Fft.exit.i
-	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([15 x i8], [15 x i8]* @"\01LC", i32 0, i32 0), double undef, double undef) nounwind		; <i32> [#uses=0]
+	%0 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([15 x i8], [15 x i8]* @"\01LC", i32 0, i32 0), double undef, double undef) nounwind		; <i32> [#uses=0]
 	unreachable
 }
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
index 72a41f9..e9c4b03 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -47,14 +47,14 @@ bb11:		; preds = %bb9
 	tail call  void @diff(i8* undef, i8* %3, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
 	%4 = sitofp i32 undef to double		; <double> [#uses=1]
 	%5 = fdiv double %4, 1.000000e+01		; <double> [#uses=1]
-	%6 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8], [29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind		; <i32> [#uses=0]
+	%6 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([29 x i8], [29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind		; <i32> [#uses=0]
 	%7 = load i32, i32* @al_len, align 4		; <i32> [#uses=1]
 	%8 = load i32, i32* @no_mat, align 4		; <i32> [#uses=1]
 	%9 = load i32, i32* @no_mis, align 4		; <i32> [#uses=1]
 	%10 = sub i32 %7, %8		; <i32> [#uses=1]
 	%11 = sub i32 %10, %9		; <i32> [#uses=1]
-	%12 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind		; <i32> [#uses=0]
-	%13 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8], [47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind		; <i32> [#uses=0]
+	%12 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind		; <i32> [#uses=0]
+	%13 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([47 x i8], [47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind		; <i32> [#uses=0]
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
index 92b1869..08291e6 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -42,10 +42,10 @@ bb11:		; preds = %bb9
 	store i32 0, i32* @no_mis, align 4
 	%4 = getelementptr i8, i8* %B, i32 %0		; <i8*> [#uses=1]
 	tail call  void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind
-	%5 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind		; <i32> [#uses=0]
+	%5 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind		; <i32> [#uses=0]
 	%6 = load i32, i32* @no_mis, align 4		; <i32> [#uses=1]
-	%7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind		; <i32> [#uses=0]
-	%8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8], [47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind		; <i32> [#uses=0]
+	%7 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind		; <i32> [#uses=0]
+	%8 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([47 x i8], [47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind		; <i32> [#uses=0]
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll
index 9caa785..b2b3bbe 100644
--- a/test/CodeGen/ARM/2009-10-16-Scope.ll
+++ b/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -9,7 +9,7 @@ entry:
   br label %do.body, !dbg !0
 
 do.body:                                          ; preds = %entry
-  call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !MDExpression()), !dbg !MDLocation(scope: !5)
   %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
   %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
   br label %do.end, !dbg !0
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
index b43f2a6..39f3292 100644
--- a/test/CodeGen/ARM/2009-10-27-double-align.ll
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -8,7 +8,7 @@ entry:
 ;CHECK: [sp, #8]
 ;CHECK: [sp, #12]
 ;CHECK: [sp]
-        tail call  void (i8*, ...)* @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
+        tail call  void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
         ret void
 }
 
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
index 8df3aaf..312cccd 100644
--- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -5,7 +5,7 @@ target triple = "armv4t-apple-darwin10"
 
 define hidden i32 @__addvsi3(i32 %a, i32 %b) nounwind {
 entry:
-  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !0, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   %0 = add nsw i32 %b, %a, !dbg !9                ; <i32> [#uses=1]
   ret i32 %0, !dbg !11
 }
@@ -27,6 +27,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !9 = !MDLocation(line: 95, scope: !10)
 !10 = distinct !MDLexicalBlock(line: 94, column: 0, file: !12, scope: !1)
 !11 = !MDLocation(line: 100, scope: !10)
-!13 = !{i32 0}
+!13 = !{}
 !14 = !{!1}
 !15 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
index 89ad5f5..deb5884 100644
--- a/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
+++ b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
@@ -19,12 +19,12 @@ entry:
   %tmp21 = load i32, i32* undef                        ; <i32> [#uses=1]
   %0 = mul i32 1, %tmp21                          ; <i32> [#uses=1]
   %vla22 = alloca i8, i32 %0, align 1             ; <i8*> [#uses=1]
-  call  void (...)* @zz(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
+  call  void (...) @zz(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
   br i1 undef, label %if.then, label %if.end36
 
 if.then:                                          ; preds = %entry
-  %call = call  i32 (...)* @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; <i32> [#uses=0]
-  %call35 = call  i32 (...)* @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; <i32> [#uses=0]
+  %call = call  i32 (...) @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; <i32> [#uses=0]
+  %call35 = call  i32 (...) @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; <i32> [#uses=0]
   unreachable
 
 if.end36:                                         ; preds = %entry
diff --git a/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
index 9cd61d3..6f55ac0 100644
--- a/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
@@ -13,7 +13,7 @@
 define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind {
 entry:
   %xcenter = alloca i32, align 4                  ; <i32*> [#uses=2]
-  %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %0 = call i32 (%struct.FILE*, i8*, ...) @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 4                          ; <i1> [#uses=1]
   br i1 %1, label %bb, label %return
 
@@ -137,7 +137,7 @@ bb322:                                            ; preds = %bb248
   br i1 undef, label %bb248, label %bb445
 
 bb445:                                            ; preds = %bb322, %bb10, %bb
-  %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %49 = call i32 (%struct.FILE*, i8*, ...) @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
   %50 = icmp eq i32 %49, 4                        ; <i1> [#uses=1]
   br i1 %50, label %bb, label %return
 
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index 4c5d8d9..b02efea 100644
--- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -31,7 +31,7 @@ define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2
 entry:
   %tmp.i = getelementptr inbounds %struct.A, %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
   %tmp2.i = load i32, i32* %tmp.i                      ; <i32> [#uses=1]
-  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
+  %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
   %tmp3.i = load i32, i32* @d                          ; <i32> [#uses=1]
   %inc.i = add nsw i32 %tmp3.i, 1                 ; <i32> [#uses=1]
   store i32 %inc.i, i32* @d
@@ -46,7 +46,7 @@ entry:
   %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2]
   %tmp2.i.i.i = bitcast i8* %exception.i to i32*  ; <i32*> [#uses=1]
   store i32 1, i32* %tmp2.i.i.i
-  %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0]
+  %call.i.i.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0]
   invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn
           to label %.noexc unwind label %lpad
 
@@ -55,16 +55,16 @@ entry:
 
 try.cont:                                         ; preds = %lpad
   %0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0]
-  %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+  %call.i.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
   %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0]
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
-  %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+  %call.i.i3 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
   %tmp3.i.i = load i32, i32* @d                        ; <i32> [#uses=1]
   %inc.i.i4 = add nsw i32 %tmp3.i.i, 1            ; <i32> [#uses=1]
   store i32 %inc.i.i4, i32* @d
   tail call void @__cxa_end_catch()
   %tmp13 = load i32, i32* @d                           ; <i32> [#uses=1]
-  %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
+  %call14 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
   %tmp16 = load i32, i32* @d                           ; <i32> [#uses=1]
   %cmp = icmp ne i32 %tmp16, 2                    ; <i1> [#uses=1]
   %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index d3c0fee..cb91890 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -15,11 +15,6 @@
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
-; OBJ:      Sections [
-; OBJ:        Section {
-; OBJ:          Index: 4
-; OBJ-NEXT:     Name: .bss
-
 ; OBJ:      Symbols [
 ; OBJ:        Symbol {
 ; OBJ:          Name: array00
diff --git a/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
index e712e08..f17884e 100644
--- a/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
+++ b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
@@ -12,7 +12,7 @@ entry:
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g(i32 %a, i32 %b) nounwind
+  tail call void (...) @g(i32 %a, i32 %b) nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
diff --git a/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
index 5404cf5..864e291 100644
--- a/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
+++ b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
@@ -12,7 +12,7 @@ entry:
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @h(i32 %a, i32 %b) nounwind
+  tail call void (...) @h(i32 %a, i32 %b) nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
@@ -31,7 +31,7 @@ entry:
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @h(i32 %a, i32 %b) nounwind
+  tail call void (...) @h(i32 %a, i32 %b) nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
diff --git a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
index 9f2fa63..86596d6 100644
--- a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
+++ b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -4,7 +4,7 @@ target triple = "armv6-none-linux-gnueabi"
 
 define void @sample_test(i8* %.T0348, i16* nocapture %sourceA, i16* nocapture %destValues) {
 L.entry:
-  %0 = call i32 (...)* @get_index(i8* %.T0348, i32 0)
+  %0 = call i32 (...) @get_index(i8* %.T0348, i32 0)
   %1 = bitcast i16* %destValues to i8*
   %2 = mul i32 %0, 6
   %3 = getelementptr i8, i8* %1, i32 %2
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index b64b1bf..4a1341c 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -33,7 +33,7 @@ entry:
 ; CHECK: movw r0, #555
 define i32 @main() {
 entry:
-  call void (i32, ...)* @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
+  call void (i32, ...) @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
   ret i32 0
 }
 
@@ -48,7 +48,7 @@ define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %v
 entry:
   %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
   %0 = load double, double* %a
-  call void (double)* @f(double %0)
+  call void (double) @f(double %0)
   ret void
 }
 
@@ -60,6 +60,6 @@ entry:
 ; CHECK: movw r0, #555
 define i32 @main_fixed_arg() {
 entry:
-  call void (i32, %struct_t*)* @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
+  call void (i32, %struct_t*) @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
   ret i32 0
 }
diff --git a/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll b/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
index ef06f59..34af902 100644
--- a/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
+++ b/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
@@ -14,6 +14,6 @@ define void @test_byval_usage_scheduling(i32 %n1, i32 %n2, %struct_t* byval %val
 entry:
   %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
   %0 = load double, double* %a
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), double %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), double %0)
   ret void
 }
diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
index 427519f..d18dbd2 100644
--- a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
+++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
@@ -14,7 +14,7 @@ define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) {
 entry:
   %conv = sext i16 %b to i32
   %conv1 = sext i8 %E to i32
-  %call = tail call i32 (i8*, ...)* @printf(
+  %call = tail call i32 (i8*, ...) @printf(
 	i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), ; --> R0
         i32 %a,                                          ; --> R1
         i32 %conv,                                       ; --> R2
diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
index 33c0587..d6045c7 100644
--- a/test/CodeGen/ARM/2013-10-11-select-stalls.ll
+++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
@@ -7,7 +7,7 @@ define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
 entry:
   %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
   %vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
-  %and = and i32 %avail, 1
+  %and = and i32 %avail, 3
   %tobool = icmp eq i32 %and, 0
   %retv = select i1 %tobool, <16 x i8> %vld1, <16 x i8> %vld2
   ret <16 x i8> %retv
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index c24d0d2..04ca3e8 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -38,7 +38,7 @@ entry:
    %tmp0 = load i32, i32* @bar_i
    %tmp2 = call i32 @foo_f()
    %tmp3 = add i32 %tmp, %tmp2
-   %tmp4 = call %FunTy* @bar_f()
+   %tmp4 = call i32 @bar_f()
    %tmp5 = add i32 %tmp3, %tmp4
    %tmp6 = add i32 %tmp1, %tmp5
    %tmp7 = add i32 %tmp6, %tmp0
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index e7fbf9f..3b1d8dd 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -18,7 +18,7 @@ define i32 @f2() nounwind optsize {
 ; DARWIN-LABEL: f2:
 ; DARWIN: mov	r3, #128
 entry:
-  %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]
+  %0 = tail call i32 (i32, ...) @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]
   %not. = icmp ne i32 %0, 128                     ; <i1> [#uses=1]
   %.0 = zext i1 %not. to i32                      ; <i32> [#uses=1]
   ret i32 %.0
diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll
index e869abe..f9199ff 100644
--- a/test/CodeGen/ARM/arm-asm.ll
+++ b/test/CodeGen/ARM/arm-asm.ll
@@ -2,6 +2,6 @@
 
 define void @frame_dummy() {
 entry:
-        %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]
+        %tmp1 = tail call void (i8*)* (void (i8*)*) asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]
         ret void
 }
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 8540833..1982fa9 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -81,6 +81,8 @@
 ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST
 ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE
 ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4 | FileCheck %s --check-prefix=CORTEX-R4
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4f | FileCheck %s --check-prefix=CORTEX-R4F
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R5-FAST
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -1012,6 +1014,49 @@
 ; CORTEX-M7-NOFPU-FAST-NOT:  .eabi_attribute 22
 ; CORTEX-M7-NOFPU-FAST:  .eabi_attribute 23, 1
 
+; CORTEX-R4:  .cpu cortex-r4
+; CORTEX-R4:  .eabi_attribute 6, 10
+; CORTEX-R4:  .eabi_attribute 7, 82
+; CORTEX-R4:  .eabi_attribute 8, 1
+; CORTEX-R4:  .eabi_attribute 9, 2
+; CORTEX-R4-NOT:  .fpu vfpv3-d16
+; CORTEX-R4-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-R4:  .eabi_attribute 20, 1
+; CORTEX-R4:  .eabi_attribute 21, 1
+; CORTEX-R4-NOT:  .eabi_attribute 22
+; CORTEX-R4:  .eabi_attribute 23, 3
+; CORTEX-R4:  .eabi_attribute 24, 1
+; CORTEX-R4:  .eabi_attribute 25, 1
+; CORTEX-R4-NOT:  .eabi_attribute 28
+; CORTEX-R4-NOT:  .eabi_attribute 36
+; CORTEX-R4:  .eabi_attribute 38, 1
+; CORTEX-R4-NOT:  .eabi_attribute 42
+; CORTEX-R4-NOT:  .eabi_attribute 44
+; CORTEX-R4-NOT:  .eabi_attribute 68
+
+; CORTEX-R4F:  .cpu cortex-r4f
+; CORTEX-R4F:  .eabi_attribute 6, 10
+; CORTEX-R4F:  .eabi_attribute 7, 82
+; CORTEX-R4F:  .eabi_attribute 8, 1
+; CORTEX-R4F:  .eabi_attribute 9, 2
+; CORTEX-R4F:  .fpu vfpv3-d16
+; CORTEX-R4F-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-R4F:  .eabi_attribute 20, 1
+; CORTEX-R4F:  .eabi_attribute 21, 1
+; CORTEX-R4F-NOT:  .eabi_attribute 22
+; CORTEX-R4F:  .eabi_attribute 23, 3
+; CORTEX-R4F:  .eabi_attribute 24, 1
+; CORTEX-R4F:  .eabi_attribute 25, 1
+; CORTEX-R4F:  .eabi_attribute 27, 1
+; CORTEX-R4F-NOT:  .eabi_attribute 28
+; CORTEX-R4F-NOT:  .eabi_attribute 36
+; CORTEX-R4F:  .eabi_attribute 38, 1
+; CORTEX-R4F-NOT:  .eabi_attribute 42
+; CORTEX-R4F-NOT:  .eabi_attribute 44
+; CORTEX-R4F-NOT:  .eabi_attribute 68
+
 ; CORTEX-R5:  .cpu cortex-r5
 ; CORTEX-R5:  .eabi_attribute 6, 10
 ; CORTEX-R5:  .eabi_attribute 7, 82
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index c1aac44..f6651ae 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -14,7 +14,7 @@ bb:		; preds = %bb1
 bb1:		; preds = %bb, %entry
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
 	%i.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
-	%tmp = tail call i32 (...)* @bar( )		; <i32> [#uses=1]
+	%tmp = tail call i32 (...) @bar( )		; <i32> [#uses=1]
 	%tmp2 = add i32 %i.0, %tmp		; <i32> [#uses=1]
 	%Ptr_addr.0 = sub i32 %Ptr, %tmp2		; <i32> [#uses=0]
 	%tmp12 = icmp eq i32 %i.0, %Ptr		; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/cache-intrinsic.ll b/test/CodeGen/ARM/cache-intrinsic.ll
index a041d075..12b55c7 100644
--- a/test/CodeGen/ARM/cache-intrinsic.ll
+++ b/test/CodeGen/ARM/cache-intrinsic.ll
@@ -10,10 +10,10 @@ define i32 @main() {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
   %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str1, i32 0, i32 0)) #3
   call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
   ret i32 0
 }
 
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index d4bd92b..f45ed73 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -9,7 +9,7 @@ entry:
         br i1 %tmp.upgrd.1, label %cond_true, label %UnifiedReturnBlock
 
 cond_true:              ; preds = %entry
-        %tmp.upgrd.2 = tail call i32 (...)* @bar( )             ; <i32> [#uses=0]
+        %tmp.upgrd.2 = tail call i32 (...) @bar( )             ; <i32> [#uses=0]
         ret void
 
 UnifiedReturnBlock:             ; preds = %entry
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index cb9520e..c75c630 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -32,7 +32,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !32, enums: !4, retainedTypes: !4, subprograms: !30, imports:  null)
+!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !32, enums: !{}, retainedTypes: !{}, subprograms: !30, imports:  null)
 !1 = !MDSubprogram(name: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !2, scope: !2, type: !3, function: void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, variables: !31)
 !2 = !MDFile(filename: "one.c", directory: "/Volumes/Athwagate/R10048772")
 !3 = !MDSubroutineType(types: !4)
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 96876b1..4e499c6 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -95,7 +95,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!162}
 
-!0 = !MDCompileUnit(language: DW_LANG_ObjC, producer: "Apple clang version 2.1", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !153, enums: !147, retainedTypes: !26, subprograms: !148)
+!0 = !MDCompileUnit(language: DW_LANG_ObjC, producer: "Apple clang version 2.1", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !153, enums: !147, retainedTypes: !{}, subprograms: !148)
 !1 = !MDCompositeType(tag: DW_TAG_enumeration_type, line: 248, size: 32, align: 32, file: !160, scope: !0, elements: !3)
 !2 = !MDFile(filename: "header.h", directory: "/Volumes/Sandbox/llvm")
 !3 = !{!4}
@@ -158,7 +158,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !60 = !MDCompositeType(tag: DW_TAG_structure_type, name: "UIMydata", line: 26, size: 128, align: 32, runtimeLang: DW_LANG_ObjC, file: !154, scope: !24, elements: !62)
 !61 = !MDFile(filename: "header11.h", directory: "/Volumes/Sandbox/llvm")
 !62 = !{!63, !71, !75, !79}
-!63 = !MDDerivedType(tag: DW_TAG_inheritance, file: !60, baseType: !64)
+!63 = !MDDerivedType(tag: DW_TAG_inheritance, file: !61, baseType: !64)
 !64 = !MDCompositeType(tag: DW_TAG_structure_type, name: "NSO", line: 66, size: 32, align: 32, runtimeLang: DW_LANG_ObjC, file: !155, scope: !40, elements: !66)
 !65 = !MDFile(filename: "NSO.h", directory: "/Volumes/Sandbox/llvm")
 !66 = !{!67}
@@ -192,7 +192,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !94 = !MDCompositeType(tag: DW_TAG_structure_type, name: "twork", line: 43, size: 32, align: 32, runtimeLang: DW_LANG_ObjC, file: !157, scope: !40, elements: !96)
 !95 = !MDFile(filename: "header13.h", directory: "/Volumes/Sandbox/llvm")
 !96 = !{!97}
-!97 = !MDDerivedType(tag: DW_TAG_inheritance, file: !94, baseType: !64)
+!97 = !MDDerivedType(tag: DW_TAG_inheritance, file: !95, baseType: !64)
 !98 = !MDDerivedType(tag: DW_TAG_member, name: "_itemID", line: 38, size: 64, align: 32, offset: 32, flags: DIFlagPrivate, file: !152, scope: !24, baseType: !99, extraData: !"")
 !99 = !MDDerivedType(tag: DW_TAG_typedef, name: "uint64_t", line: 55, file: !153, scope: !0, baseType: !100)
 !100 = !MDBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 32, encoding: DW_ATE_unsigned)
@@ -201,7 +201,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !103 = !MDCompositeType(tag: DW_TAG_structure_type, name: "MyLibrary2", line: 22, size: 32, align: 32, runtimeLang: DW_LANG_ObjC, file: !158, scope: !40, elements: !105)
 !104 = !MDFile(filename: "header14.h", directory: "/Volumes/Sandbox/llvm")
 !105 = !{!106}
-!106 = !MDDerivedType(tag: DW_TAG_inheritance, file: !103, baseType: !64)
+!106 = !MDDerivedType(tag: DW_TAG_inheritance, file: !104, baseType: !64)
 !107 = !MDDerivedType(tag: DW_TAG_member, name: "_bounds", line: 40, size: 128, align: 32, offset: 128, flags: DIFlagPrivate, file: !152, scope: !24, baseType: !108, extraData: !"")
 !108 = !MDDerivedType(tag: DW_TAG_typedef, name: "CR", line: 33, file: !153, scope: !0, baseType: !109)
 !109 = !MDCompositeType(tag: DW_TAG_structure_type, name: "CR", line: 29, size: 128, align: 32, file: !156, scope: !0, elements: !110)
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index bd2ae80..cb57efa 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -28,10 +28,10 @@ for.body9:                                        ; preds = %for.body9, %entry
 for.end54:                                        ; preds = %for.body9
   %tmp115 = extractelement <4 x float> %add19, i32 1
   %conv6.i75 = fpext float %tmp115 to double, !dbg !45
-  %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+  %call.i82 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
   %tmp116 = extractelement <4 x float> %add20, i32 1
   %conv6.i76 = fpext float %tmp116 to double, !dbg !45
-  %call.i83 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i76, double undef, double undef) nounwind, !dbg !45
+  %call.i83 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i76, double undef, double undef) nounwind, !dbg !45
   ret i32 0, !dbg !49
 }
 
@@ -44,7 +44,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !0 = !MDSubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
 !1 = !MDFile(filename: "build2.c", directory: "/private/tmp")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !17, retainedTypes: !17, subprograms: !50, imports:  null)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports:  null)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{!5}
 !5 = !MDDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 0667e0f..034d0f4 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -16,7 +16,7 @@ entry:
   tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !20, metadata !MDExpression()), !dbg !26
   tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !21, metadata !MDExpression()), !dbg !26
   %0 = zext i8 %c to i32, !dbg !27
-  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
+  %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
   ret i32 0, !dbg !29
 }
 
@@ -26,7 +26,7 @@ entry:
   tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !17, metadata !MDExpression()), !dbg !30
   tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !18, metadata !MDExpression()), !dbg !30
   %0 = zext i8 %c to i32, !dbg !31
-  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
+  %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
   ret i32 0, !dbg !33
 }
 
@@ -45,11 +45,11 @@ entry:
   %3 = getelementptr inbounds i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37
   %4 = trunc i32 %argc to i8, !dbg !37
   %5 = add i8 %4, 97, !dbg !37
-  tail call void @llvm.dbg.value(metadata i8* %3, i64 0, metadata !19, metadata !MDExpression()) nounwind, !dbg !38
-  tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !20, metadata !MDExpression()) nounwind, !dbg !38
-  tail call void @llvm.dbg.value(metadata i8 %5, i64 0, metadata !21, metadata !MDExpression()) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata i8* %3, i64 0, metadata !49, metadata !MDExpression()) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !50, metadata !MDExpression()) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata i8 %5, i64 0, metadata !51, metadata !MDExpression()) nounwind, !dbg !38
   %6 = zext i8 %5 to i32, !dbg !39
-  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
+  %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
   %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40
   ret i32 0, !dbg !41
 }
@@ -75,12 +75,17 @@ declare i32 @puts(i8* nocapture) nounwind
 !13 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !14)
 !14 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !15)
 !15 = !MDBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!16 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 0, scope: !0, file: !1, type: !6)
-!17 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 0, scope: !0, file: !1, type: !7)
-!18 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 0, scope: !0, file: !1, type: !8)
-!19 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 0, scope: !9, file: !1, type: !6)
-!20 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 0, scope: !9, file: !1, type: !7)
-!21 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 0, scope: !9, file: !1, type: !8)
+!16 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !0, file: !1, type: !6)
+!17 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !0, file: !1, type: !7)
+!18 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !0, file: !1, type: !8)
+!19 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!20 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!21 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !9, file: !1, type: !8)
+
+!49 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!50 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!51 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 2, scope: !9, file: !1, type: !8)
+
 !22 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 17, arg: 0, scope: !10, file: !1, type: !5)
 !23 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 17, arg: 0, scope: !10, file: !1, type: !13)
 !24 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "dval", line: 19, scope: !25, file: !1, type: !7)
@@ -106,5 +111,5 @@ declare i32 @puts(i8* nocapture) nounwind
 !44 = !{!19, !20, !21}
 !45 = !{!22, !23, !24}
 !46 = !MDFile(filename: "a.c", directory: "/tmp/")
-!47 = !{i32 0}
+!47 = !{}
 !48 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-info-no-frame.ll b/test/CodeGen/ARM/debug-info-no-frame.ll
new file mode 100644
index 0000000..418a074
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-no-frame.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabihf < %s -o - | FileCheck %s
+
+; Function Attrs: nounwind
+define void @need_cfi_def_cfa_offset() #0 {
+; CHECK-LABEL: need_cfi_def_cfa_offset:
+; CHECK: sub	sp, sp, #4
+; CHECK: .cfi_def_cfa_offset 4
+entry:
+  %Depth = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata i32* %Depth, metadata !9, metadata !10), !dbg !11
+  store i32 2, i32* %Depth, align 4, !dbg !11
+  ret void, !dbg !12
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false)
+!1 = !MDFile(filename: "file.c", directory: "/dir")
+!2 = !{}
+!3 = !MDSubprogram(name: "need_cfi_def_cfa_offset", scope: !1, file: !1, line: 1, type: !4, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, function: void ()* @need_cfi_def_cfa_offset, variables: !2)
+!4 = !MDSubroutineType(types: !5)
+!5 = !{null}
+!6 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "Depth", scope: !3, file: !1, line: 3, type: !6)
+!10 = !MDExpression()
+!11 = !MDLocation(line: 3, column: 9, scope: !3)
+!12 = !MDLocation(line: 7, column: 5, scope: !3)
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 398f652..9cfd67d 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -27,7 +27,7 @@ for.end54:                                        ; preds = %for.body9
   tail call void @llvm.dbg.value(metadata <4 x float> %add19, i64 0, metadata !27, metadata !MDExpression()), !dbg !39
   %tmp115 = extractelement <4 x float> %add19, i32 1
   %conv6.i75 = fpext float %tmp115 to double, !dbg !45
-  %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+  %call.i82 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
   ret i32 0, !dbg !49
 }
 
@@ -40,11 +40,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !0 = !MDSubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !54, scope: !1, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
 !1 = !MDFile(filename: "build2.c", directory: "/private/tmp")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !17, retainedTypes: !17, subprograms: !50, imports:  null)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports:  null)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{!5}
 !5 = !MDDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
-!6 = !MDCompositeType(tag: DW_TAG_array_type, size: 128, align: 128, file: !2, baseType: !7, elements: !8)
+!6 = !MDCompositeType(tag: DW_TAG_array_type, size: 128, align: 128, file: !1, baseType: !7, elements: !8)
 !7 = !MDBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !8 = !{!9}
 !9 = !MDSubrange(count: 4)
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index d08ec03..3cd2837 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -19,7 +19,7 @@ entry:
   tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !12, metadata !MDExpression()), !dbg !26
   %conv = fpext float %val to double, !dbg !27
   %conv3 = zext i8 %c to i32, !dbg !27
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
   ret i32 0, !dbg !29
 }
 
@@ -32,7 +32,7 @@ entry:
   tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !16, metadata !MDExpression()), !dbg !32
   %conv = fpext float %val to double, !dbg !33
   %conv3 = zext i8 %c to i32, !dbg !33
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
   ret i32 0, !dbg !35
 }
 
@@ -48,12 +48,12 @@ entry:
   %add.ptr = getelementptr i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40
   %add5 = add nsw i32 %argc, 97, !dbg !40
   %conv6 = trunc i32 %add5 to i8, !dbg !40
-  tail call void @llvm.dbg.value(metadata i8* %add.ptr, i64 0, metadata !8, metadata !MDExpression()) nounwind, !dbg !41
-  tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !10, metadata !MDExpression()) nounwind, !dbg !42
-  tail call void @llvm.dbg.value(metadata i8 %conv6, i64 0, metadata !12, metadata !MDExpression()) nounwind, !dbg !43
+  tail call void @llvm.dbg.value(metadata i8* %add.ptr, i64 0, metadata !58, metadata !MDExpression()) nounwind, !dbg !41
+  tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !60, metadata !MDExpression()) nounwind, !dbg !42
+  tail call void @llvm.dbg.value(metadata i8 %conv6, i64 0, metadata !62, metadata !MDExpression()) nounwind, !dbg !43
   %conv.i = fpext float %conv1 to double, !dbg !44
   %conv3.i = and i32 %add5, 255, !dbg !44
-  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
+  %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
   %call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45
   ret i32 0, !dbg !46
 }
@@ -79,6 +79,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !11 = !MDBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !12 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
 !13 = !MDBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+
+!58 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
+!60 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
+!62 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
+
 !14 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !6, file: !1, type: !9)
 !15 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !6, file: !1, type: !11)
 !16 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !6, file: !1, type: !13)
@@ -117,5 +122,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !49 = !{!14, !15, !16}
 !50 = !{!17, !18, !22}
 !51 = !MDFile(filename: "a.c", directory: "/private/tmp")
-!52 = !{i32 0}
+!52 = !{}
 !53 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index df578fd..e5f7a27 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -62,5 +62,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !16 = !{!1}
 !17 = !{!5, !8}
 !18 = !MDFile(filename: "k.cc", directory: "/private/tmp")
-!19 = !{i32 0}
+!19 = !{}
 !20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index a339c81..7b298fe 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,11 +1,13 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-ARM
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift     | FileCheck %s -check-prefix=CHECK-HWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8    | FileCheck %s -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift        | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4    | FileCheck %s -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f   | FileCheck %s -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5    | FileCheck %s -check-prefix=CHECK-HWDIV
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
-; CHECK-ARM: f1
-; CHECK-ARM: __divsi3
+; CHECK-SWDIV: f1
+; CHECK-SWDIV: __divsi3
 
 ; CHECK-HWDIV: f1
 ; CHECK-HWDIV: sdiv
@@ -15,8 +17,8 @@ entry:
 
 define i32 @f2(i32 %a, i32 %b) {
 entry:
-; CHECK-ARM: f2
-; CHECK-ARM: __udivsi3
+; CHECK-SWDIV: f2
+; CHECK-SWDIV: __udivsi3
 
 ; CHECK-HWDIV: f2
 ; CHECK-HWDIV: udiv
@@ -26,8 +28,8 @@ entry:
 
 define i32 @f3(i32 %a, i32 %b) {
 entry:
-; CHECK-ARM: f3
-; CHECK-ARM: __modsi3
+; CHECK-SWDIV: f3
+; CHECK-SWDIV: __modsi3
 
 ; CHECK-HWDIV: f3
 ; CHECK-HWDIV: sdiv
@@ -38,8 +40,8 @@ entry:
 
 define i32 @f4(i32 %a, i32 %b) {
 entry:
-; CHECK-ARM: f4
-; CHECK-ARM: __umodsi3
+; CHECK-SWDIV: f4
+; CHECK-SWDIV: __umodsi3
 
 ; CHECK-HWDIV: f4
 ; CHECK-HWDIV: udiv
diff --git a/test/CodeGen/ARM/fast-isel-vararg.ll b/test/CodeGen/ARM/fast-isel-vararg.ll
index aa37e7d..35442ee 100644
--- a/test/CodeGen/ARM/fast-isel-vararg.ll
+++ b/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -37,7 +37,7 @@ entry:
 ; THUMB: str.w {{[a-z0-9]+}}, [sp]
 ; THUMB: str.w {{[a-z0-9]+}}, [sp, #4]
 ; THUMB: bl {{_?}}CallVariadic
-  %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
+  %call = call i32 (i32, ...) @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
   store i32 %call, i32* %tmp, align 4
   %5 = load i32, i32* %tmp, align 4
   ret i32 %5
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 1de0572..d013fbf 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -48,7 +48,7 @@ entry:
 ; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
-  %0 = tail call double (...)* @bar() nounwind
+  %0 = tail call double (...) @bar() nounwind
   %1 = fptrunc double %0 to float
   %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
   %3 = fadd float %1, %2
diff --git a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
index 9731b3d..f34f8f1 100644
--- a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
+++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -16,6 +16,6 @@ define ghccc void @test_indirect_tail() {
 ; CHECK-LABEL: test_indirect_tail:
 ; CHECK: bx {{r[0-9]+}}
   %func = load void()*, void()** @ind_func
-  tail call ghccc void()* %func()
+  tail call ghccc void() %func()
   ret void
 }
diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll
index 03a9d33..d4d9b0f 100644
--- a/test/CodeGen/ARM/global-merge-1.ll
+++ b/test/CodeGen/ARM/global-merge-1.ll
@@ -1,10 +1,12 @@
 ; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
 ; RUN: llc %s -O1 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
 ; RUN: llc %s -O3 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O3 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O3 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O3 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O3 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
 
 ; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
 ; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index a00deda..7890193 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -10,7 +10,7 @@ entry:
 	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
 
 cond_true:		; preds = %entry
-	%tmp10 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp10 = call i32 (...) @bar( )		; <i32> [#uses=0]
 	ret void
 
 UnifiedReturnBlock:		; preds = %entry
diff --git a/test/CodeGen/ARM/indirectbr-2.ll b/test/CodeGen/ARM/indirectbr-2.ll
index 044fb56..ca068db 100644
--- a/test/CodeGen/ARM/indirectbr-2.ll
+++ b/test/CodeGen/ARM/indirectbr-2.ll
@@ -27,7 +27,7 @@ define i32 @func() nounwind ssp {
   %9 = load i32, i32* %8
   %10 = add i32 %9, ptrtoint (i8* blockaddress(@func, %4) to i32)
   %11 = inttoptr i32 %10 to i8*
-  %12 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @0, i32 0, i32 0))
+  %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @0, i32 0, i32 0))
   indirectbr i8* %11, [label %13, label %14]
 
 ; <label>:13                                      ; preds = %4
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 78d2228..4ea26e1 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -30,7 +30,7 @@ entry:
 define void @t1(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t1:
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
 ; CHECK: adds r0, #15
 ; CHECK: adds r1, #15
@@ -48,7 +48,7 @@ entry:
 ; CHECK: str [[REG2]], [r0, #32]
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
   ret void
@@ -59,7 +59,7 @@ entry:
 ; CHECK-LABEL: t3:
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
-; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
+; CHECK: vldr d{{[0-9]+}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
   ret void
@@ -68,7 +68,7 @@ entry:
 define void @t4(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t4:
-; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
+; CHECK: vld1.64 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
 ; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]!
 ; CHECK: strh [[REG5:r[0-9]+]], [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
@@ -97,11 +97,11 @@ entry:
 define void @t6() nounwind {
 entry:
 ; CHECK-LABEL: t6:
-; CHECK: vld1.8 {[[REG9:d[0-9]+]]}, [r0]
+; CHECK: vldr [[REG9:d[0-9]+]], [r0]
 ; CHECK: vstr [[REG9]], [r1]
 ; CHECK: adds r1, #6
 ; CHECK: adds r0, #6
-; CHECK: vld1.8
+; CHECK: vld1.16
 ; CHECK: vst1.16
 ; CHECK-T1-LABEL: t6:
 ; CHECK-T1: movs [[TREG5:r[0-9]]],
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 160096a..c214336 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -3,22 +3,19 @@
 ; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
 ; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
 
-@from = common global [500 x i32] zeroinitializer, align 4
-@to = common global [500 x i32] zeroinitializer, align 4
-
-define void @f1() {
+define void @f1(i8* %dest, i8* %src) {
 entry:
   ; CHECK-LABEL: f1
 
   ; CHECK-IOS: memmove
   ; CHECK-DARWIN: memmove
   ; CHECK-EABI: __aeabi_memmove
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
 
   ; CHECK-IOS: memcpy
   ; CHECK-DARWIN: memcpy
   ; CHECK-EABI: __aeabi_memcpy
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
 
   ; EABI memset swaps arguments
   ; CHECK-IOS: mov r1, #0
@@ -27,7 +24,7 @@ entry:
   ; CHECK-DARWIN: memset
   ; CHECK-EABI: mov r2, #0
   ; CHECK-EABI: __aeabi_memset
-  call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false)
   unreachable
 }
 
@@ -281,6 +278,47 @@ entry:
   unreachable
 }
 
+; Check that global variables are aligned if they are large enough, but only if
+; they are defined in this object and don't have an explicit section.
+@arr1 = global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+@arr2 = global [8 x i8] c"\01\02\03\04\05\06\07\08", align 1
+@arr3 = global [7 x i8] c"\01\02\03\04\05\06\07", section "foo,bar", align 1
+@arr4 = global [8 x i8] c"\01\02\03\04\05\06\07\08", section "foo,bar", align 1
+@arr5 = weak global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+@arr6 = weak_odr global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+@arr7 = external global [7 x i8], align 1
+define void @f9(i8* %dest, i32 %n) {
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false)
+
+  unreachable
+}
+
+; CHECK: {{\.data|\.section.+data}}
+; CHECK-NOT: .align
+; CHECK: arr1:
+; CHECK-IOS: .align 3
+; CHECK-DARWIN: .align 2
+; CHECK-EABI: .align 2
+; CHECK: arr2:
+; CHECK: {{\.section.+foo,bar}}
+; CHECK-NOT: .align
+; CHECK: arr3:
+; CHECK-NOT: .align
+; CHECK: arr4:
+; CHECK: {{\.data|\.section.+data}}
+; CHECK-NOT: .align
+; CHECK: arr5:
+; CHECK-NOT: .align
+; CHECK: arr6:
+; CHECK-NOT: arr7:
+
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
index 8f0f3a8..4eeaa8a 100644
--- a/test/CodeGen/ARM/neon-spfp.ll
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -64,7 +64,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
 ; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
   %conv = fpext float %mul to double
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), double %conv) #1
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), double %conv) #1
   %inc = add nsw i32 %i.04, 1
   %exitcond = icmp eq i32 %inc, 16000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/ARM/optselect-regclass.ll b/test/CodeGen/ARM/optselect-regclass.ll
index f921127..4c5d44c 100644
--- a/test/CodeGen/ARM/optselect-regclass.ll
+++ b/test/CodeGen/ARM/optselect-regclass.ll
@@ -17,7 +17,7 @@ entry:
   %or = or i32 %cond13, %bf.clear10
   %shl = shl nuw i32 %or, 2
   %add = add i32 0, %shl
-  tail call void (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @operands, i32 0, i32 0), i32 0, i32 50, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str86, i32 0, i32 0), i32 undef, i32 undef, i32 %add)
+  tail call void (i8*, i32, i32, i8*, ...) @__sprintf_chk(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @operands, i32 0, i32 0), i32 0, i32 50, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str86, i32 0, i32 0), i32 undef, i32 undef, i32 %add)
   ret void
 }
 
diff --git a/test/CodeGen/ARM/print-memb-operand.ll b/test/CodeGen/ARM/print-memb-operand.ll
new file mode 100644
index 0000000..7748efb
--- /dev/null
+++ b/test/CodeGen/ARM/print-memb-operand.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s
+
+; CHECK: dmb ld
+
+define void @test2() #0 {
+  call void @llvm.arm.dmb(i32 13)
+  ret void
+}
+
+declare void @llvm.arm.dmb(i32)
+
+attributes #0 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/ARM/regpair_hint_phys.ll b/test/CodeGen/ARM/regpair_hint_phys.ll
new file mode 100644
index 0000000..8585a4c
--- /dev/null
+++ b/test/CodeGen/ARM/regpair_hint_phys.ll
@@ -0,0 +1,22 @@
+; RUN: llc -o - %s
+; ARM target used to fail an assertion if RegPair{Odd|Even} hint pointed to a
+; physreg.
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-tvos8.3.0"
+
+declare i8* @llvm.frameaddress(i32) #1
+declare i8* @llvm.returnaddress(i32) #1
+
+@somevar = global [2 x i32] [i32 0, i32 0]
+
+define void @__ubsan_handle_shift_out_of_bounds() #0 {
+entry:
+  %0 = tail call i8* @llvm.frameaddress(i32 0)
+  %1 = ptrtoint i8* %0 to i32
+  %2 = tail call i8* @llvm.returnaddress(i32 0)
+  %3 = ptrtoint i8* %2 to i32
+  %val0 = insertvalue [2 x i32] [i32 undef, i32 undef], i32 %3, 0
+  %val1 = insertvalue [2 x i32] %val0, i32 %1, 1
+  store [2 x i32] %val1, [2 x i32]* @somevar, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/saxpy10-a9.ll b/test/CodeGen/ARM/saxpy10-a9.ll
index af7b7ad..91610f1 100644
--- a/test/CodeGen/ARM/saxpy10-a9.ll
+++ b/test/CodeGen/ARM/saxpy10-a9.ll
@@ -14,15 +14,12 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 ; CHECK: vldr
 ; CHECK: vldr
 ; CHECK: vldr
-; CHECK: vldr
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vldr
 ; CHECK-NEXT: vldr
-; CHECK-NEXT: vldr
-; CHECK-NEXT: vadd
-; CHECK-NEXT: vmul
-; CHECK-NEXT: vldr
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vmul
@@ -31,6 +28,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vldr
 ; CHECK-NEXT: vmul
+; CHECK-NEXT: vldr
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vldr
 ; CHECK-NEXT: vadd
@@ -48,6 +46,8 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 ; CHECK-NEXT: vmul
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
 ; CHECK-NEXT: vmul
 ; CHECK-NEXT: vadd
 ; CHECK-NEXT: vldr
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 3999168..6f5c0e8 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -64,7 +64,7 @@ entry:
 ; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A9: str [[REG]], [r0, r1, lsl #2]
 ; A9-NOT: str [[REG]], [r0]
-  %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+  %0 = tail call i8* (...) @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
   %3 = getelementptr inbounds i32, i32* %1, i32 %2
diff --git a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
index 15f8ec2..2a7a82d 100644
--- a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
+++ b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
@@ -16,7 +16,7 @@ entry:
   %title = alloca [15 x i8], align 1
   %0 = getelementptr inbounds [15 x i8], [15 x i8]* %title, i32 0, i32 0
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false)
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8* %0) #3
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8* %0) #3
   ret i32 0
 }
 
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index 31c6ecd..88207e6 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -10,7 +10,7 @@ entry:
 ; CHECK: main
 ; CHECK: push
 ; CHECK: stm
-	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8], [26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
-	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8], [32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
+	%0 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([26 x i8], [26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
+	%1 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([32 x i8], [32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/ARM/uint64tof64.ll b/test/CodeGen/ARM/uint64tof64.ll
index f77603e..cd35ce7 100644
--- a/test/CodeGen/ARM/uint64tof64.ll
+++ b/test/CodeGen/ARM/uint64tof64.ll
@@ -10,7 +10,7 @@ entry:
 	%0 = load i64, i64* null, align 4		; <i64> [#uses=1]
 	%1 = uitofp i64 %0 to double		; <double> [#uses=1]
 	%2 = fdiv double 0.000000e+00, %1		; <double> [#uses=1]
-	%3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8], [54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2)		; <i32> [#uses=0]
+	%3 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8], [54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2)		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll
index 78d8448..41ec038 100644
--- a/test/CodeGen/ARM/vargs.ll
+++ b/test/CodeGen/ARM/vargs.ll
@@ -4,8 +4,8 @@
 
 define i32 @main() {
 entry:
-        %tmp = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8], [43 x i8]* @str, i32 0, i64 0), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 )         ; <i32> [#uses=0]
-        %tmp2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8], [43 x i8]* @str, i32 0, i64 0), i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 )                ; <i32> [#uses=0]
+        %tmp = call i32 (i8*, ...) @printf( i8* getelementptr ([43 x i8], [43 x i8]* @str, i32 0, i64 0), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 )         ; <i32> [#uses=0]
+        %tmp2 = call i32 (i8*, ...) @printf( i8* getelementptr ([43 x i8], [43 x i8]* @str, i32 0, i64 0), i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 )                ; <i32> [#uses=0]
         ret i32 11
 }
 
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index 0b7ffb8..78105f7 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -180,8 +180,8 @@ define <2 x i64> @fix_float_to_i64(<2 x float> %in) {
 
 define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
 ; CHECK-LABEL: fix_double_to_i16:
-; CHECK: vcvt.s32.f64
-; CHECK: vcvt.s32.f64
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
 
   %scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
   %conv = fptoui <4 x double> %scale to <4 x i16>
diff --git a/test/CodeGen/ARM/vector-spilling.ll b/test/CodeGen/ARM/vector-spilling.ll
index b8058c8..9e3225e 100644
--- a/test/CodeGen/ARM/vector-spilling.ll
+++ b/test/CodeGen/ARM/vector-spilling.ll
@@ -25,7 +25,7 @@ entry:
   %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 
-  tail call void(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>)* @foo(<8 x i64> %1, <8 x i64> %3, <8 x i64> %5, <8 x i64> %7, <8 x i64> %8, <8 x i64> %9)
+  tail call void(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) @foo(<8 x i64> %1, <8 x i64> %3, <8 x i64> %5, <8 x i64> %7, <8 x i64> %8, <8 x i64> %9)
   ret void
 }
 
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 31b55e8..03c0354 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -124,11 +124,11 @@ entry:
 	br i1 %tmp6, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
-	%tmp.upgrd.2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp.upgrd.2 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	ret void
 
 cond_false:		; preds = %entry
-	%tmp7 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]
+	%tmp7 = tail call i32 (...) @baz( )		; <i32> [#uses=0]
 	ret void
 }
 
@@ -147,10 +147,10 @@ entry:
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
-	%tmp.upgrd.4 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp.upgrd.4 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	ret void
 
 cond_false:		; preds = %entry
-	%tmp1 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]
+	%tmp1 = tail call i32 (...) @baz( )		; <i32> [#uses=0]
 	ret void
 }
diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll
index a183284..2e2648d 100644
--- a/test/CodeGen/ARM/vminmaxnm.ll
+++ b/test/CodeGen/ARM/vminmaxnm.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s
-; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 \
+; RUN:          -enable-no-nans-fp-math -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
+
+; vectors
 
 define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK-LABEL: vmaxnmq:
@@ -37,6 +40,8 @@ define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
   ret <2 x float> %tmp3
 }
 
+; scalars
+
 define float @fp-armv8_vminnm_o(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vminnm_o":
 ; CHECK-FAST-NOT: vcmp
@@ -48,6 +53,17 @@ define float @fp-armv8_vminnm_o(float %a, float %b) {
   ret float %cond
 }
 
+define double @fp-armv8_vminnm_ole(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_ole":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_ole":
+; CHECK-NOT: vminnm.f64
+  %cmp = fcmp ole double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}
+
 define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vminnm_o_rev":
 ; CHECK-FAST-NOT: vcmp
@@ -59,6 +75,17 @@ define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
   ret float %cond
 }
 
+define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_oge_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
+; CHECK-NOT: vminnm.f64
+  %cmp = fcmp oge double %a, %b
+  %cond = select i1 %cmp, double %b, double %a
+  ret double %cond
+}
+
 define float @fp-armv8_vminnm_u(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vminnm_u":
 ; CHECK-FAST-NOT: vcmp
@@ -70,6 +97,17 @@ define float @fp-armv8_vminnm_u(float %a, float %b) {
   ret float %cond
 }
 
+define float @fp-armv8_vminnm_ule(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_ule":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_ule":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ule float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
 define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vminnm_u_rev":
 ; CHECK-FAST-NOT: vcmp
@@ -81,6 +119,17 @@ define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
   ret float %cond
 }
 
+define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_uge_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
+; CHECK-NOT: vminnm.f64
+  %cmp = fcmp uge double %a, %b
+  %cond = select i1 %cmp, double %b, double %a
+  ret double %cond
+}
+
 define float @fp-armv8_vmaxnm_o(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o":
 ; CHECK-FAST-NOT: vcmp
@@ -92,6 +141,17 @@ define float @fp-armv8_vmaxnm_o(float %a, float %b) {
   ret float %cond
 }
 
+define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_oge":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp oge float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
 define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o_rev":
 ; CHECK-FAST-NOT: vcmp
@@ -103,6 +163,17 @@ define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
   ret float %cond
 }
 
+define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ole_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ole float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
 define float @fp-armv8_vmaxnm_u(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u":
 ; CHECK-FAST-NOT: vcmp
@@ -114,6 +185,17 @@ define float @fp-armv8_vmaxnm_u(float %a, float %b) {
   ret float %cond
 }
 
+define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_uge":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp uge float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
 define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
 ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u_rev":
 ; CHECK-FAST-NOT: vcmp
@@ -125,6 +207,17 @@ define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
   ret float %cond
 }
 
+define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ule_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f64
+; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
+; CHECK-NOT: vmaxnm.f64
+  %cmp = fcmp ule double %a, %b
+  %cond = select i1 %cmp, double %b, double %a
+  ret double %cond
+}
+
 
 declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
 declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll
index 82ab90e..a2911d7 100644
--- a/test/CodeGen/ARM/weak2.ll
+++ b/test/CodeGen/ARM/weak2.ll
@@ -8,7 +8,7 @@ entry:
 	br i1 %tmp5, label %UnifiedReturnBlock, label %cond_true8
 
 cond_true8:		; preds = %entry
-	%tmp10 = tail call i32 (...)* %t.0( )		; <i32> [#uses=1]
+	%tmp10 = tail call i32 (...) %t.0( )		; <i32> [#uses=1]
 	ret i32 %tmp10
 
 UnifiedReturnBlock:		; preds = %entry
diff --git a/test/CodeGen/BPF/basictest.ll b/test/CodeGen/BPF/basictest.ll
index c0b6af4..2a2d498 100644
--- a/test/CodeGen/BPF/basictest.ll
+++ b/test/CodeGen/BPF/basictest.ll
@@ -8,8 +8,8 @@ define i32 @test0(i32 %X) {
 }
 
 ; CHECK-LABEL: store_imm:
-; CHECK: stw  0(r1), r0
-; CHECK: stw  4(r2), r0
+; CHECK: stw  0(r1), r{{[03]}}
+; CHECK: stw  4(r2), r{{[03]}}
 define i32 @store_imm(i32* %a, i32* %b) {
 entry:
   store i32 0, i32* %a, align 4
diff --git a/test/CodeGen/BPF/ex1.ll b/test/CodeGen/BPF/ex1.ll
index 366bc17..be038e9 100644
--- a/test/CodeGen/BPF/ex1.ll
+++ b/test/CodeGen/BPF/ex1.ll
@@ -26,7 +26,7 @@ define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/ne
 ; <label>:10                                      ; preds = %0
   %11 = getelementptr inbounds [15 x i8], [15 x i8]* %fmt, i64 0, i64 0
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %11, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @bpf_prog1.fmt, i64 0, i64 0), i64 15, i32 1, i1 false)
-  %12 = call i32 (i8*, i32, ...)* inttoptr (i64 11 to i32 (i8*, i32, ...)*)(i8* %11, i32 15, %struct.sk_buff* %4, i8* %7) #1
+  %12 = call i32 (i8*, i32, ...) inttoptr (i64 11 to i32 (i8*, i32, ...)*)(i8* %11, i32 15, %struct.sk_buff* %4, i8* %7) #1
 ; CHECK-LABEL: bpf_prog1:
 ; CHECK: call 4
 ; CHECK: call 9
diff --git a/test/CodeGen/BPF/intrinsics.ll b/test/CodeGen/BPF/intrinsics.ll
index e0f050e..98b57de 100644
--- a/test/CodeGen/BPF/intrinsics.ll
+++ b/test/CodeGen/BPF/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=bpf | FileCheck %s
+; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
 
 ; Function Attrs: nounwind uwtable
 define i32 @ld_b(i64 %foo, i64* nocapture %bar, i8* %ctx, i8* %ctx2) #0 {
@@ -48,3 +48,41 @@ define i32 @ld_w(i8* %ctx, i8* %ctx2, i32 %foo) #0 {
 }
 
 declare i64 @llvm.bpf.load.word(i8*, i64) #1
+
+define i32 @ld_pseudo() #0 {
+entry:
+  %call = tail call i64 @llvm.bpf.pseudo(i64 2, i64 3)
+  tail call void @bar(i64 %call, i32 4) #2
+  ret i32 0
+; CHECK-LABEL: ld_pseudo:
+; CHECK: ld_pseudo r1, 2, 3 # encoding: [0x18,0x21,0x00,0x00,0x03,0x00
+}
+
+declare void @bar(i64, i32) #1
+
+declare i64 @llvm.bpf.pseudo(i64, i64) #2
+
+define i32 @bswap(i64 %a, i64 %b, i64 %c) #0 {
+entry:
+  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %conv = trunc i64 %b to i32
+  %1 = tail call i32 @llvm.bswap.i32(i32 %conv)
+  %conv1 = zext i32 %1 to i64
+  %add = add i64 %conv1, %0
+  %conv2 = trunc i64 %c to i16
+  %2 = tail call i16 @llvm.bswap.i16(i16 %conv2)
+  %conv3 = zext i16 %2 to i64
+  %add4 = add i64 %add, %conv3
+  %conv5 = trunc i64 %add4 to i32
+  ret i32 %conv5
+; CHECK-LABEL: bswap:
+; CHECK: bswap64 r1     # encoding: [0xdc,0x01,0x00,0x00,0x40,0x00,0x00,0x00]
+; CHECK: bswap32 r2     # encoding: [0xdc,0x02,0x00,0x00,0x20,0x00,0x00,0x00]
+; CHECK: add     r2, r1 # encoding: [0x0f,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: bswap16 r3     # encoding: [0xdc,0x03,0x00,0x00,0x10,0x00,0x00,0x00]
+; CHECK: add     r2, r3 # encoding: [0x0f,0x32,0x00,0x00,0x00,0x00,0x00,0x00]
+}
+
+declare i64 @llvm.bswap.i64(i64) #1
+declare i32 @llvm.bswap.i32(i32) #1
+declare i16 @llvm.bswap.i16(i16) #1
diff --git a/test/CodeGen/BPF/sanity.ll b/test/CodeGen/BPF/sanity.ll
index b9040ef..09a6b65 100644
--- a/test/CodeGen/BPF/sanity.ll
+++ b/test/CodeGen/BPF/sanity.ll
@@ -106,7 +106,7 @@ define void @foo_printf() #1 {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @foo_printf.fmt, i64 0, i64 0), i64 9, i32 1, i1 false)
 ; CHECK-LABEL: foo_printf:
 ; CHECK: ld_64 r1, 729618802566522216
-  %2 = call i32 (i8*, ...)* @printf(i8* %1) #3
+  %2 = call i32 (i8*, ...) @printf(i8* %1) #3
   ret void
 }
 
diff --git a/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
index ae18582..470303d 100644
--- a/test/CodeGen/CPP/2009-05-01-Long-Double.ll
+++ b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
@@ -3,7 +3,7 @@
 define x86_fp80 @some_func() nounwind {
 entry:
 	%retval = alloca x86_fp80		; <x86_fp80*> [#uses=2]
-	%call = call i32 (...)* @other_func()		; <i32> [#uses=1]
+	%call = call i32 (...) @other_func()		; <i32> [#uses=1]
 	%conv = sitofp i32 %call to x86_fp80		; <x86_fp80> [#uses=1]
 	store x86_fp80 %conv, x86_fp80* %retval
 	%0 = load x86_fp80, x86_fp80* %retval		; <x86_fp80> [#uses=1]
diff --git a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
index a130085..7e402f5 100644
--- a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
+++ b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
@@ -31,11 +31,11 @@ entry:
         br i1 %tmp.8, label %then, label %else
 
 then:           ; preds = %entry
-        %tmp.11 = call i32 (i8*, ...)* @printf( i8* getelementptr ([6 x i8], [6 x i8]* @.str_1, i64 0, i64 0) )           ; <i32> [#uses=0]
+        %tmp.11 = call i32 (i8*, ...) @printf( i8* getelementptr ([6 x i8], [6 x i8]* @.str_1, i64 0, i64 0) )           ; <i32> [#uses=0]
         br label %UnifiedExitNode
 
 else:           ; preds = %entry
-        %tmp.13 = call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8], [7 x i8]* @.str_2, i64 0, i64 0) )           ; <i32> [#uses=0]
+        %tmp.13 = call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @.str_2, i64 0, i64 0) )           ; <i32> [#uses=0]
         br label %UnifiedExitNode
 
 UnifiedExitNode:                ; preds = %else, %then
diff --git a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
index e58fc97..928b57e 100644
--- a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
+++ b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
@@ -14,7 +14,7 @@ entry:
         %tmp.11 = call i64 @getL( )             ; <i64> [#uses=2]
         %tmp.5 = trunc i64 %tmp.11 to i32               ; <i32> [#uses=2]
         %tmp.23 = and i64 %tmp.11, -4294967296          ; <i64> [#uses=2]
-        %tmp.16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([42 x i8], [42 x i8]* @.str_1, i64 0, i64 0), i32 %tmp.5, i32 %tmp.5, i64 %tmp.23, i64 %tmp.23 )              ; <i32> [#uses=0]
+        %tmp.16 = call i32 (i8*, ...) @printf( i8* getelementptr ([42 x i8], [42 x i8]* @.str_1, i64 0, i64 0), i32 %tmp.5, i32 %tmp.5, i64 %tmp.23, i64 %tmp.23 )              ; <i32> [#uses=0]
         ret i32 0
 }
 
diff --git a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
index 72968d7..73ad186 100644
--- a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
+++ b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
@@ -28,7 +28,7 @@ entry:
 define i32 @main() {
 entry:
         %result = call i32 @adj( i32 3, i32 2 )         ; <i32> [#uses=1]
-        %tmp.0 = call i32 (i8*, ...)* @printf( i8* getelementptr ([30 x i8], [30 x i8]* @.str_1, i64 0, i64 0), i32 3, i32 2, i32 %result )              ; <i32> [#uses=0]
+        %tmp.0 = call i32 (i8*, ...) @printf( i8* getelementptr ([30 x i8], [30 x i8]* @.str_1, i64 0, i64 0), i32 3, i32 2, i32 %result )              ; <i32> [#uses=0]
         ret i32 0
 }
 
diff --git a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
index 0d0c37b..010c0c5 100644
--- a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
+++ b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
@@ -28,8 +28,8 @@ loopentry:              ; preds = %loopentry, %entry
         %i = phi i64 [ 0, %entry ], [ %inc.i, %loopentry ]              ; <i64> [#uses=3]
         %cptr = getelementptr [6 x i8], [6 x i8]* @yy_ec, i64 0, i64 %i           ; <i8*> [#uses=1]
         %c = load i8, i8* %cptr             ; <i8> [#uses=1]
-        %ignore = call i32 (i8*, ...)* @printf( i8* getelementptr ([8 x i8], [8 x i8]* @.str_3, i64 0, i64 0), i64 %i )        ; <i32> [#uses=0]
-        %ignore2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @.str_4, i64 0, i64 0), i8 %c )        ; <i32> [#uses=0]
+        %ignore = call i32 (i8*, ...) @printf( i8* getelementptr ([8 x i8], [8 x i8]* @.str_3, i64 0, i64 0), i64 %i )        ; <i32> [#uses=0]
+        %ignore2 = call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @.str_4, i64 0, i64 0), i8 %c )        ; <i32> [#uses=0]
         %inc.i = add i64 %i, 1          ; <i64> [#uses=2]
         %done = icmp sle i64 %inc.i, 5          ; <i1> [#uses=1]
         br i1 %done, label %loopentry, label %exit.1
diff --git a/test/CodeGen/Generic/2005-12-01-Crash.ll b/test/CodeGen/Generic/2005-12-01-Crash.ll
index 45d6204..e6ab9d2 100644
--- a/test/CodeGen/Generic/2005-12-01-Crash.ll
+++ b/test/CodeGen/Generic/2005-12-01-Crash.ll
@@ -11,7 +11,7 @@
 define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
 entry:
 	%tmp17 = sext i8 %a13 to i32		; <i32> [#uses=1]
-	%tmp23 = call i32 (i8*, ...)* @printf( i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0 )		; <i32> [#uses=0]
+	%tmp23 = call i32 (i8*, ...) @printf( i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0 )		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
index 57673bb..12a4011 100644
--- a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
+++ b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
@@ -18,7 +18,7 @@ bb.i:		; preds = %cond_next.i, %entry
 	br i1 false, label %cond_true.i31, label %cond_next.i
 
 cond_true.i31:		; preds = %bb.i
-	call void (i32, ...)* @fprintf( i32 0, i8* %tmp11, i8* null )
+	call void (i32, ...) @fprintf( i32 0, i8* %tmp11, i8* null )
 	ret void
 
 cond_next.i:		; preds = %bb.i
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 27a37a9..3244e5c 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp38 = trunc i64 %tmp37 to i32		; <i32>:0 [#uses=1]
 	%tmp48 = trunc i64 %tmp47 to i32		; <i32>:0 [#uses=1]
 	%tmp58 = trunc i64 %tmp57 to i32		; <i32>:0 [#uses=1]
-	%tmp40 = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([14 x i8], [14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 %tmp38, i32 %tmp48, i32 %tmp58 ) nounwind 		; <i32> [#uses=0]
+	%tmp40 = tail call i32 (i8*, ...) @printf( i8* noalias  getelementptr ([14 x i8], [14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 %tmp38, i32 %tmp48, i32 %tmp58 ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index 35c0f20..14800ce 100644
--- a/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -7,7 +7,7 @@ entry:
 	%tmp106 = load float, float* null, align 4		; <float> [#uses=1]
 	%tmp113 = fadd float %tmp98, %tmp106		; <float> [#uses=1]
 	%tmp119 = fsub float %tmp113, 0.000000e+00		; <float> [#uses=1]
-	call void (i32, ...)* @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind 
+	call void (i32, ...) @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind 
 	ret void
 }
 
diff --git a/test/CodeGen/Generic/ConstantExprLowering.ll b/test/CodeGen/Generic/ConstantExprLowering.ll
index 5c57b47..3119dfa 100644
--- a/test/CodeGen/Generic/ConstantExprLowering.ll
+++ b/test/CodeGen/Generic/ConstantExprLowering.ll
@@ -16,7 +16,7 @@ less:           ; preds = %entry
 
 not_less:               ; preds = %less, %entry
         %t2 = phi i32 [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %less ], [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %entry ]               ; <i32> [#uses=1]
-        %tmp.39 = call i32 (i8*, ...)* @printf( i8* getelementptr ([16 x i8], [16 x i8]* @.str_1, i64 0, i64 0), i32 %t2 )      ; <i32> [#uses=0]
+        %tmp.39 = call i32 (i8*, ...) @printf( i8* getelementptr ([16 x i8], [16 x i8]* @.str_1, i64 0, i64 0), i32 %t2 )      ; <i32> [#uses=0]
         ret void
 }
 
diff --git a/test/CodeGen/Generic/PBQP.ll b/test/CodeGen/Generic/PBQP.ll
index 91fcfba..31fc4e6 100644
--- a/test/CodeGen/Generic/PBQP.ll
+++ b/test/CodeGen/Generic/PBQP.ll
@@ -2,23 +2,23 @@
 
 define i32 @foo() {
 entry:
-  %call = tail call i32 (...)* @baz()
-  %call1 = tail call i32 (...)* @baz()
-  %call2 = tail call i32 (...)* @baz()
-  %call3 = tail call i32 (...)* @baz()
-  %call4 = tail call i32 (...)* @baz()
-  %call5 = tail call i32 (...)* @baz()
-  %call6 = tail call i32 (...)* @baz()
-  %call7 = tail call i32 (...)* @baz()
-  %call8 = tail call i32 (...)* @baz()
-  %call9 = tail call i32 (...)* @baz()
-  %call10 = tail call i32 (...)* @baz()
-  %call11 = tail call i32 (...)* @baz()
-  %call12 = tail call i32 (...)* @baz()
-  %call13 = tail call i32 (...)* @baz()
-  %call14 = tail call i32 (...)* @baz()
-  %call15 = tail call i32 (...)* @baz()
-  %call16 = tail call i32 (...)* @baz()
+  %call = tail call i32 (...) @baz()
+  %call1 = tail call i32 (...) @baz()
+  %call2 = tail call i32 (...) @baz()
+  %call3 = tail call i32 (...) @baz()
+  %call4 = tail call i32 (...) @baz()
+  %call5 = tail call i32 (...) @baz()
+  %call6 = tail call i32 (...) @baz()
+  %call7 = tail call i32 (...) @baz()
+  %call8 = tail call i32 (...) @baz()
+  %call9 = tail call i32 (...) @baz()
+  %call10 = tail call i32 (...) @baz()
+  %call11 = tail call i32 (...) @baz()
+  %call12 = tail call i32 (...) @baz()
+  %call13 = tail call i32 (...) @baz()
+  %call14 = tail call i32 (...) @baz()
+  %call15 = tail call i32 (...) @baz()
+  %call16 = tail call i32 (...) @baz()
   %call17 = tail call i32 @bar(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15, i32 %call16)
   ret i32 %call17
 }
diff --git a/test/CodeGen/Generic/add-with-overflow-128.ll b/test/CodeGen/Generic/add-with-overflow-128.ll
index b091915..2a7456c 100644
--- a/test/CodeGen/Generic/add-with-overflow-128.ll
+++ b/test/CodeGen/Generic/add-with-overflow-128.ll
@@ -14,11 +14,11 @@ entry:
   br i1 %obit, label %carry, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
   ret i1 true
 
 carry:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 }
 
diff --git a/test/CodeGen/Generic/add-with-overflow-24.ll b/test/CodeGen/Generic/add-with-overflow-24.ll
index 7edc1f8..6f06ae6 100644
--- a/test/CodeGen/Generic/add-with-overflow-24.ll
+++ b/test/CodeGen/Generic/add-with-overflow-24.ll
@@ -12,11 +12,11 @@ entry:
   br i1 %obit, label %overflow, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
   ret i1 true
 
 overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 }
 
@@ -29,11 +29,11 @@ entry:
   br i1 %obit, label %carry, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
   ret i1 true
 
 carry:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 }
 
diff --git a/test/CodeGen/Generic/add-with-overflow.ll b/test/CodeGen/Generic/add-with-overflow.ll
index 2204055..b6bbaa1 100644
--- a/test/CodeGen/Generic/add-with-overflow.ll
+++ b/test/CodeGen/Generic/add-with-overflow.ll
@@ -12,11 +12,11 @@ entry:
   br i1 %obit, label %overflow, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
 
 overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 }
 
@@ -28,11 +28,11 @@ entry:
   br i1 %obit, label %overflow, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
 
 overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 }
 
diff --git a/test/CodeGen/Generic/badarg6.ll b/test/CodeGen/Generic/badarg6.ll
index 7388bb4..34736ec 100644
--- a/test/CodeGen/Generic/badarg6.ll
+++ b/test/CodeGen/Generic/badarg6.ll
@@ -27,6 +27,6 @@ bb43:		; preds = %bb42, %bb25
 	%reg323 = phi double [ -1.000000e+00, %bb25 ], [ %reg317, %bb42 ]		; <double> [#uses=1]
 	%reg324 = phi double [ -1.000000e+00, %bb25 ], [ %reg318, %bb42 ]		; <double> [#uses=1]
 	%reg325 = phi double [ 1.000000e+00, %bb25 ], [ %reg319, %bb42 ]		; <double> [#uses=1]
-	%reg609 = call i32 (i8*, ...)* @printf( i8* getelementptr ([44 x i8], [44 x i8]* @.LC12, i64 0, i64 0), double %reg325, double %reg324, double %reg323, double %reg322, double %reg321 )		; <i32> [#uses=0]
+	%reg609 = call i32 (i8*, ...) @printf( i8* getelementptr ([44 x i8], [44 x i8]* @.LC12, i64 0, i64 0), double %reg325, double %reg324, double %reg323, double %reg322, double %reg321 )		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/Generic/builtin-expect.ll b/test/CodeGen/Generic/builtin-expect.ll
index 2f76acf..def687e 100644
--- a/test/CodeGen/Generic/builtin-expect.ll
+++ b/test/CodeGen/Generic/builtin-expect.ll
@@ -14,7 +14,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -43,7 +43,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -71,7 +71,7 @@ entry:
   br i1 %tobool1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -100,7 +100,7 @@ entry:
   br i1 %tobool2, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -127,7 +127,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -206,7 +206,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
diff --git a/test/CodeGen/Generic/cast-fp.ll b/test/CodeGen/Generic/cast-fp.ll
index 3b03096..a2611f5 100644
--- a/test/CodeGen/Generic/cast-fp.ll
+++ b/test/CodeGen/Generic/cast-fp.ll
@@ -12,22 +12,22 @@ declare i32 @printf(i8*, ...)
 define i32 @main() {
 	%a = load double, double* @A		; <double> [#uses=4]
 	%a_fs = getelementptr [8 x i8], [8 x i8]* @a_fstr, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_fs, double %a )		; <i32>:1 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_fs, double %a )		; <i32>:1 [#uses=0]
 	%a_d2l = fptosi double %a to i64		; <i64> [#uses=1]
 	%a_ls = getelementptr [10 x i8], [10 x i8]* @a_lstr, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_ls, i64 %a_d2l )		; <i32>:2 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_ls, i64 %a_d2l )		; <i32>:2 [#uses=0]
 	%a_d2i = fptosi double %a to i32		; <i32> [#uses=2]
 	%a_ds = getelementptr [8 x i8], [8 x i8]* @a_dstr, i64 0, i64 0		; <i8*> [#uses=3]
-	call i32 (i8*, ...)* @printf( i8* %a_ds, i32 %a_d2i )		; <i32>:3 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_ds, i32 %a_d2i )		; <i32>:3 [#uses=0]
 	%a_d2sb = fptosi double %a to i8		; <i8> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2sb )		; <i32>:4 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_ds, i8 %a_d2sb )		; <i32>:4 [#uses=0]
 	%a_d2i2sb = trunc i32 %a_d2i to i8		; <i8> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2i2sb )		; <i32>:5 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_ds, i8 %a_d2i2sb )		; <i32>:5 [#uses=0]
 	%b = load i32, i32* @B		; <i32> [#uses=2]
 	%b_ds = getelementptr [8 x i8], [8 x i8]* @b_dstr, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %b_ds, i32 %b )		; <i32>:6 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %b_ds, i32 %b )		; <i32>:6 [#uses=0]
 	%b_i2d = sitofp i32 %b to double		; <double> [#uses=1]
 	%b_fs = getelementptr [8 x i8], [8 x i8]* @b_fstr, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %b_fs, double %b_i2d )		; <i32>:7 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %b_fs, double %b_i2d )		; <i32>:7 [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/Generic/constindices.ll b/test/CodeGen/Generic/constindices.ll
index 3b43db0..837836f 100644
--- a/test/CodeGen/Generic/constindices.ll
+++ b/test/CodeGen/Generic/constindices.ll
@@ -39,6 +39,6 @@ define i32 @main() {
         %dpi = fpext float %pi to double                ; <double> [#uses=1]
         %dfive = fpext float %five to double            ; <double> [#uses=1]
         %castFmt = getelementptr [44 x i8], [44 x i8]* @fmtArg, i64 0, i64 0               ; <i8*> [#uses=1]
-        call i32 (i8*, ...)* @printf( i8* %castFmt, double %dsqrtTwo, double %dexp, double %dpi, double %dfive )     ; <i32>:1 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %castFmt, double %dsqrtTwo, double %dexp, double %dpi, double %dfive )     ; <i32>:1 [#uses=0]
         ret i32 0
 }
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
index c5200d7..2ee667c 100644
--- a/test/CodeGen/Generic/dbg_value.ll
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -4,11 +4,11 @@
 %0 = type { i32, i32 }
 
 define void @t(%0*, i32, i32, i32, i32) nounwind {
-  tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !MDSubprogram())
   unreachable
 }
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; !0 should conform to the format of DIVariable.
-!0 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 0, scope: null)
+!0 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 0, scope: !MDSubprogram())
diff --git a/test/CodeGen/Generic/hello.ll b/test/CodeGen/Generic/hello.ll
index dff4791..a8147da 100644
--- a/test/CodeGen/Generic/hello.ll
+++ b/test/CodeGen/Generic/hello.ll
@@ -6,6 +6,6 @@ declare i32 @printf(i8*, ...)
 
 define i32 @main() {
         %s = getelementptr [7 x i8], [7 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=1]
-        call i32 (i8*, ...)* @printf( i8* %s )          ; <i32>:1 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %s )          ; <i32>:1 [#uses=0]
         ret i32 0
 }
diff --git a/test/CodeGen/Generic/negintconst.ll b/test/CodeGen/Generic/negintconst.ll
index 1cf69de..4c0a654 100644
--- a/test/CodeGen/Generic/negintconst.ll
+++ b/test/CodeGen/Generic/negintconst.ll
@@ -41,7 +41,7 @@ define i32 @main() {
         %ioff.upgrd.1 = zext i32 %ioff to i64           ; <i64> [#uses=1]
         %fptr = getelementptr %Results, %Results* %fval, i64 %ioff.upgrd.1                ; <%Results*> [#uses=1]
         %castFmt = getelementptr [39 x i8], [39 x i8]* @fmtArg, i64 0, i64 0               ; <i8*> [#uses=1]
-        call i32 (i8*, ...)* @printf( i8* %castFmt, i32 %ioff, %Results* %fval, %Results* %fptr )               ; <i32>:1 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %castFmt, i32 %ioff, %Results* %fval, %Results* %fptr )               ; <i32>:1 [#uses=0]
         ret i32 0
 }
 
diff --git a/test/CodeGen/Generic/overloaded-intrinsic-name.ll b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
index aa6a031..915ba9f 100644
--- a/test/CodeGen/Generic/overloaded-intrinsic-name.ll
+++ b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
@@ -12,29 +12,29 @@
 ; will serve the purpose.
 
 ; function and integer
-define i32* @test_iAny(i32* %v) {
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %v)
+define i32* @test_iAny(i32* %v) gc "statepoint-example" {
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %v)
        %v-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
        ret i32* %v-new
 }
 
 ; float
-define float* @test_fAny(float* %v) {
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, float* %v)
+define float* @test_fAny(float* %v) gc "statepoint-example" {
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, float* %v)
        %v-new = call float* @llvm.experimental.gc.relocate.p0f32(i32 %tok, i32 4, i32 4)
        ret float* %v-new
 }
 
 ; array of integers
-define [3 x i32]* @test_aAny([3 x i32]* %v) {
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %v)
+define [3 x i32]* @test_aAny([3 x i32]* %v) gc "statepoint-example" {
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %v)
        %v-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
        ret [3 x i32]* %v-new
 }
 
 ; vector of integers
-define <3 x i32>* @test_vAny(<3 x i32>* %v) {
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, <3 x i32>* %v)
+define <3 x i32>* @test_vAny(<3 x i32>* %v) gc "statepoint-example" {
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, <3 x i32>* %v)
        %v-new = call <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(i32 %tok, i32 4, i32 4)
        ret <3 x i32>* %v-new
 }
@@ -42,8 +42,8 @@ define <3 x i32>* @test_vAny(<3 x i32>* %v) {
 %struct.test = type { i32, i1 }
 
 ; struct
-define %struct.test* @test_struct(%struct.test* %v) {
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, %struct.test* %v)
+define %struct.test* @test_struct(%struct.test* %v) gc "statepoint-example" {
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, %struct.test* %v)
        %v-new = call %struct.test* @llvm.experimental.gc.relocate.p0struct.test(i32 %tok, i32 4, i32 4)
        ret %struct.test* %v-new
 }
diff --git a/test/CodeGen/Generic/print-add.ll b/test/CodeGen/Generic/print-add.ll
index 553438a..0507aba 100644
--- a/test/CodeGen/Generic/print-add.ll
+++ b/test/CodeGen/Generic/print-add.ll
@@ -7,12 +7,12 @@ declare i32 @printf(i8*, ...)
 define i32 @main() {
         %f = getelementptr [4 x i8], [4 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=3]
         %d = add i32 1, 0               ; <i32> [#uses=3]
-        call i32 (i8*, ...)* @printf( i8* %f, i32 %d )          ; <i32>:1 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %f, i32 %d )          ; <i32>:1 [#uses=0]
         %e = add i32 38, 2              ; <i32> [#uses=2]
-        call i32 (i8*, ...)* @printf( i8* %f, i32 %e )          ; <i32>:2 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %f, i32 %e )          ; <i32>:2 [#uses=0]
         %g = add i32 %d, %d             ; <i32> [#uses=1]
         %h = add i32 %e, %g             ; <i32> [#uses=1]
-        call i32 (i8*, ...)* @printf( i8* %f, i32 %h )          ; <i32>:3 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %f, i32 %h )          ; <i32>:3 [#uses=0]
         ret i32 0
 }
 
diff --git a/test/CodeGen/Generic/print-arith-fp.ll b/test/CodeGen/Generic/print-arith-fp.ll
index b00229c..93b158e 100644
--- a/test/CodeGen/Generic/print-arith-fp.ll
+++ b/test/CodeGen/Generic/print-arith-fp.ll
@@ -22,8 +22,8 @@ define i32 @main() {
 	%b = load double, double* @B		; <double> [#uses=12]
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_s, double %a )		; <i32>:1 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %b_s, double %b )		; <i32>:2 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_s, double %a )		; <i32>:1 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %b_s, double %b )		; <i32>:2 [#uses=0]
 	%add_r = fadd double %a, %b		; <double> [#uses=1]
 	%sub_r = fsub double %a, %b		; <double> [#uses=1]
 	%mul_r = fmul double %a, %b		; <double> [#uses=1]
@@ -34,11 +34,11 @@ define i32 @main() {
 	%mul_s = getelementptr [12 x i8], [12 x i8]* @mul_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%div_s = getelementptr [12 x i8], [12 x i8]* @div_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%rem_s = getelementptr [13 x i8], [13 x i8]* @rem_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %add_s, double %add_r )		; <i32>:3 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %sub_s, double %sub_r )		; <i32>:4 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %mul_s, double %mul_r )		; <i32>:5 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %div_s, double %div_r )		; <i32>:6 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %rem_s, double %rem_r )		; <i32>:7 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %add_s, double %add_r )		; <i32>:3 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %sub_s, double %sub_r )		; <i32>:4 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %mul_s, double %mul_r )		; <i32>:5 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %div_s, double %div_r )		; <i32>:6 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %rem_s, double %rem_r )		; <i32>:7 [#uses=0]
 	%lt_r = fcmp olt double %a, %b		; <i1> [#uses=1]
 	%le_r = fcmp ole double %a, %b		; <i1> [#uses=1]
 	%gt_r = fcmp ogt double %a, %b		; <i1> [#uses=1]
@@ -51,11 +51,11 @@ define i32 @main() {
 	%ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/Generic/print-arith-int.ll b/test/CodeGen/Generic/print-arith-int.ll
index 2e176e4..a5c519c 100644
--- a/test/CodeGen/Generic/print-arith-int.ll
+++ b/test/CodeGen/Generic/print-arith-int.ll
@@ -27,8 +27,8 @@ define i32 @main() {
 	%b = load i32, i32* @B		; <i32> [#uses=17]
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )		; <i32>:2 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %b_s, i32 %b )		; <i32>:2 [#uses=0]
 	%add_r = add i32 %a, %b		; <i32> [#uses=1]
 	%sub_r = sub i32 %a, %b		; <i32> [#uses=1]
 	%mul_r = mul i32 %a, %b		; <i32> [#uses=1]
@@ -39,11 +39,11 @@ define i32 @main() {
 	%mul_s = getelementptr [12 x i8], [12 x i8]* @mul_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%div_s = getelementptr [12 x i8], [12 x i8]* @div_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%rem_s = getelementptr [13 x i8], [13 x i8]* @rem_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %add_s, i32 %add_r )		; <i32>:3 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %sub_s, i32 %sub_r )		; <i32>:4 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %mul_s, i32 %mul_r )		; <i32>:5 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %div_s, i32 %div_r )		; <i32>:6 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %rem_s, i32 %rem_r )		; <i32>:7 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %add_s, i32 %add_r )		; <i32>:3 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %sub_s, i32 %sub_r )		; <i32>:4 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %mul_s, i32 %mul_r )		; <i32>:5 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %div_s, i32 %div_r )		; <i32>:6 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %rem_s, i32 %rem_r )		; <i32>:7 [#uses=0]
 	%lt_r = icmp slt i32 %a, %b		; <i1> [#uses=1]
 	%le_r = icmp sle i32 %a, %b		; <i1> [#uses=1]
 	%gt_r = icmp sgt i32 %a, %b		; <i1> [#uses=1]
@@ -56,12 +56,12 @@ define i32 @main() {
 	%ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]
 	%and_r = and i32 %a, %b		; <i32> [#uses=1]
 	%or_r = or i32 %a, %b		; <i32> [#uses=1]
 	%xor_r = xor i32 %a, %b		; <i32> [#uses=1]
@@ -75,10 +75,10 @@ define i32 @main() {
 	%xor_s = getelementptr [12 x i8], [12 x i8]* @xor_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%shl_s = getelementptr [13 x i8], [13 x i8]* @shl_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%shr_s = getelementptr [13 x i8], [13 x i8]* @shr_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %and_s, i32 %and_r )		; <i32>:14 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %or_s, i32 %or_r )		; <i32>:15 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %xor_s, i32 %xor_r )		; <i32>:16 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %shl_s, i32 %shl_r )		; <i32>:17 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %shr_s, i32 %shr_r )		; <i32>:18 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %and_s, i32 %and_r )		; <i32>:14 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %or_s, i32 %or_r )		; <i32>:15 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %xor_s, i32 %xor_r )		; <i32>:16 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %shl_s, i32 %shl_r )		; <i32>:17 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %shr_s, i32 %shr_r )		; <i32>:18 [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/Generic/print-int.ll b/test/CodeGen/Generic/print-int.ll
index 0afc0e9..85b40c0 100644
--- a/test/CodeGen/Generic/print-int.ll
+++ b/test/CodeGen/Generic/print-int.ll
@@ -7,7 +7,7 @@ declare i32 @printf(i8*, ...)
 define i32 @main() {
         %f = getelementptr [4 x i8], [4 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=1]
         %d = add i32 0, 0               ; <i32> [#uses=1]
-        %tmp.0 = call i32 (i8*, ...)* @printf( i8* %f, i32 %d )         ; <i32> [#uses=0]
+        %tmp.0 = call i32 (i8*, ...) @printf( i8* %f, i32 %d )         ; <i32> [#uses=0]
         ret i32 0
 }
 
diff --git a/test/CodeGen/Generic/print-mul-exp.ll b/test/CodeGen/Generic/print-mul-exp.ll
index a08333d..91c8147 100644
--- a/test/CodeGen/Generic/print-mul-exp.ll
+++ b/test/CodeGen/Generic/print-mul-exp.ll
@@ -10,7 +10,7 @@ define i32 @main() {
 	%a = load i32, i32* @A		; <i32> [#uses=21]
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%a_mul_s = getelementptr [13 x i8], [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=20]
-	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
 	%r_0 = mul i32 %a, 0		; <i32> [#uses=1]
 	%r_1 = mul i32 %a, 1		; <i32> [#uses=1]
 	%r_2 = mul i32 %a, 2		; <i32> [#uses=1]
@@ -31,25 +31,25 @@ define i32 @main() {
 	%r_17 = mul i32 %a, 17		; <i32> [#uses=1]
 	%r_18 = mul i32 %a, 18		; <i32> [#uses=1]
 	%r_19 = mul i32 %a, 19		; <i32> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 0, i32 %r_0 )		; <i32>:2 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 1, i32 %r_1 )		; <i32>:3 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 2, i32 %r_2 )		; <i32>:4 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 3, i32 %r_3 )		; <i32>:5 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 4, i32 %r_4 )		; <i32>:6 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 5, i32 %r_5 )		; <i32>:7 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 6, i32 %r_6 )		; <i32>:8 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 7, i32 %r_7 )		; <i32>:9 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 8, i32 %r_8 )		; <i32>:10 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 9, i32 %r_9 )		; <i32>:11 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 10, i32 %r_10 )		; <i32>:12 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 11, i32 %r_11 )		; <i32>:13 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 12, i32 %r_12 )		; <i32>:14 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 13, i32 %r_13 )		; <i32>:15 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 14, i32 %r_14 )		; <i32>:16 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 15, i32 %r_15 )		; <i32>:17 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 16, i32 %r_16 )		; <i32>:18 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 17, i32 %r_17 )		; <i32>:19 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 18, i32 %r_18 )		; <i32>:20 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 19, i32 %r_19 )		; <i32>:21 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 0, i32 %r_0 )		; <i32>:2 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 1, i32 %r_1 )		; <i32>:3 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 2, i32 %r_2 )		; <i32>:4 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 3, i32 %r_3 )		; <i32>:5 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 4, i32 %r_4 )		; <i32>:6 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 5, i32 %r_5 )		; <i32>:7 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 6, i32 %r_6 )		; <i32>:8 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 7, i32 %r_7 )		; <i32>:9 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 8, i32 %r_8 )		; <i32>:10 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 9, i32 %r_9 )		; <i32>:11 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 10, i32 %r_10 )		; <i32>:12 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 11, i32 %r_11 )		; <i32>:13 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 12, i32 %r_12 )		; <i32>:14 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 13, i32 %r_13 )		; <i32>:15 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 14, i32 %r_14 )		; <i32>:16 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 15, i32 %r_15 )		; <i32>:17 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 16, i32 %r_16 )		; <i32>:18 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 17, i32 %r_17 )		; <i32>:19 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 18, i32 %r_18 )		; <i32>:20 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 19, i32 %r_19 )		; <i32>:21 [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/Generic/print-mul.ll b/test/CodeGen/Generic/print-mul.ll
index 06f2b40..4b60d75 100644
--- a/test/CodeGen/Generic/print-mul.ll
+++ b/test/CodeGen/Generic/print-mul.ll
@@ -15,14 +15,14 @@ entry:
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%a_mul_s = getelementptr [13 x i8], [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:0 [#uses=0]
-	call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )		; <i32>:1 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_s, i32 %a )		; <i32>:0 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %b_s, i32 %b )		; <i32>:1 [#uses=0]
 	br label %shl_test
 
 shl_test:		; preds = %shl_test, %entry
 	%s = phi i32 [ 0, %entry ], [ %s_inc, %shl_test ]		; <i32> [#uses=4]
 	%result = mul i32 %a, %s		; <i32> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 %s, i32 %result )		; <i32>:2 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 %s, i32 %result )		; <i32>:2 [#uses=0]
 	%s_inc = add i32 %s, 1		; <i32> [#uses=1]
 	%done = icmp eq i32 %s, 256		; <i1> [#uses=1]
 	br i1 %done, label %fini, label %shl_test
diff --git a/test/CodeGen/Generic/print-shift.ll b/test/CodeGen/Generic/print-shift.ll
index af14f77..56b3ec1 100644
--- a/test/CodeGen/Generic/print-shift.ll
+++ b/test/CodeGen/Generic/print-shift.ll
@@ -15,15 +15,15 @@ entry:
         %a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0             ; <i8*> [#uses=1]
         %b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0             ; <i8*> [#uses=1]
         %a_shl_s = getelementptr [14 x i8], [14 x i8]* @a_shl_str, i64 0, i64 0            ; <i8*> [#uses=1]
-        call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )                ; <i32>:0 [#uses=0]
-        call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )                ; <i32>:1 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %a_s, i32 %a )                ; <i32>:0 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %b_s, i32 %b )                ; <i32>:1 [#uses=0]
         br label %shl_test
 
 shl_test:               ; preds = %shl_test, %entry
         %s = phi i8 [ 0, %entry ], [ %s_inc, %shl_test ]                ; <i8> [#uses=4]
         %shift.upgrd.1 = zext i8 %s to i32              ; <i32> [#uses=1]
         %result = shl i32 %a, %shift.upgrd.1            ; <i32> [#uses=1]
-        call i32 (i8*, ...)* @printf( i8* %a_shl_s, i8 %s, i32 %result )                ; <i32>:2 [#uses=0]
+        call i32 (i8*, ...) @printf( i8* %a_shl_s, i8 %s, i32 %result )                ; <i32>:2 [#uses=0]
         %s_inc = add i8 %s, 1           ; <i8> [#uses=1]
         %done = icmp eq i8 %s, 32               ; <i1> [#uses=1]
         br i1 %done, label %fini, label %shl_test
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
index 6d060c1..5a8345c 100644
--- a/test/CodeGen/Hexagon/adde.ll
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
 
 ; CHECK: r{{[0-9]+:[0-9]+}} = #0
 ; CHECK: r{{[0-9]+:[0-9]+}} = #1
diff --git a/test/CodeGen/Hexagon/expand-condsets-basic.ll b/test/CodeGen/Hexagon/expand-condsets-basic.ll
new file mode 100644
index 0000000..16fe8af
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-condsets-basic.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: if{{.*}}add
+; CHECK: if{{.*}}sub
+
+define i32 @foo (i1 %a, i32 %b, i32 %c, i32 %d) nounwind {
+  %1 = add i32 %b, %d
+  %2 = sub i32 %c, %d
+  %3 = select i1 %a, i32 %1, i32 %2
+  ret i32 %3
+}
+
diff --git a/test/CodeGen/Hexagon/expand-condsets-rm-segment.ll b/test/CodeGen/Hexagon/expand-condsets-rm-segment.ll
new file mode 100644
index 0000000..cde7e6a
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-condsets-rm-segment.ll
@@ -0,0 +1,131 @@
+; RUN: llc -O2 < %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon-unknown--elf"
+
+%struct.cpumask = type { [1 x i32] }
+%struct.load_weight = type { i32, i32 }
+
+@sysctl_sched_latency = global i32 6000000, align 4
+@normalized_sysctl_sched_latency = global i32 6000000, align 4
+@sysctl_sched_tunable_scaling = global i8 1, align 1
+@sysctl_sched_min_granularity = global i32 750000, align 4
+@normalized_sysctl_sched_min_granularity = global i32 750000, align 4
+@sysctl_sched_wakeup_granularity = global i32 1000000, align 4
+@normalized_sysctl_sched_wakeup_granularity = global i32 1000000, align 4
+@sysctl_sched_migration_cost = constant i32 500000, align 4
+@sysctl_sched_shares_window = global i32 10000000, align 4
+@sysctl_sched_child_runs_first = common global i32 0, align 4
+@cpu_online_mask = external constant %struct.cpumask*
+
+; Function Attrs: noinline nounwind
+define void @sched_init_granularity() #0 {
+entry:
+  tail call fastcc void @update_sysctl()
+  ret void
+}
+
+; Function Attrs: noinline nounwind
+define internal fastcc void @update_sysctl() #0 {
+entry:
+  %call = tail call i32 @get_update_sysctl_factor()
+  %0 = load i32, i32* @normalized_sysctl_sched_min_granularity, align 4, !tbaa !1
+  %mul = mul i32 %0, %call
+  store i32 %mul, i32* @sysctl_sched_min_granularity, align 4, !tbaa !1
+  %1 = load i32, i32* @normalized_sysctl_sched_latency, align 4, !tbaa !1
+  %mul1 = mul i32 %1, %call
+  store i32 %mul1, i32* @sysctl_sched_latency, align 4, !tbaa !1
+  %2 = load i32, i32* @normalized_sysctl_sched_wakeup_granularity, align 4, !tbaa !1
+  %mul2 = mul i32 %2, %call
+  store i32 %mul2, i32* @sysctl_sched_wakeup_granularity, align 4, !tbaa !1
+  ret void
+}
+
+; Function Attrs: noinline nounwind
+define i32 @calc_delta_mine(i32 %delta_exec, i32 %weight, %struct.load_weight* nocapture %lw) #0 {
+entry:
+  %cmp = icmp ugt i32 %weight, 1
+  %conv = zext i32 %delta_exec to i64
+  br i1 %cmp, label %if.then, label %if.end, !prof !5
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i32 %weight to i64
+  %mul = mul i64 %conv2, %conv
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %tmp.0 = phi i64 [ %mul, %if.then ], [ %conv, %entry ]
+  %inv_weight = getelementptr inbounds %struct.load_weight, %struct.load_weight* %lw, i32 0, i32 1
+  %0 = load i32, i32* %inv_weight, align 4, !tbaa !6
+  %tobool4 = icmp eq i32 %0, 0
+  br i1 %tobool4, label %if.then5, label %if.end22
+
+if.then5:                                         ; preds = %if.end
+  %weight7 = getelementptr inbounds %struct.load_weight, %struct.load_weight* %lw, i32 0, i32 0
+  %1 = load i32, i32* %weight7, align 4, !tbaa !9
+  %lnot9 = icmp eq i32 %1, 0
+  br i1 %lnot9, label %if.then17, label %if.else19, !prof !10
+
+if.then17:                                        ; preds = %if.then5
+  store i32 -1, i32* %inv_weight, align 4, !tbaa !6
+  br label %if.end22
+
+if.else19:                                        ; preds = %if.then5
+  %div = udiv i32 -1, %1
+  store i32 %div, i32* %inv_weight, align 4, !tbaa !6
+  br label %if.end22
+
+if.end22:                                         ; preds = %if.end, %if.then17, %if.else19
+  %2 = phi i32 [ %0, %if.end ], [ -1, %if.then17 ], [ %div, %if.else19 ]
+  %cmp23 = icmp ugt i64 %tmp.0, 4294967295
+  br i1 %cmp23, label %if.then31, label %if.else37, !prof !10
+
+if.then31:                                        ; preds = %if.end22
+  %add = add i64 %tmp.0, 32768
+  %shr = lshr i64 %add, 16
+  %conv33 = zext i32 %2 to i64
+  %mul34 = mul i64 %conv33, %shr
+  %add35 = add i64 %mul34, 32768
+  %shr36 = lshr i64 %add35, 16
+  br label %if.end43
+
+if.else37:                                        ; preds = %if.end22
+  %conv39 = zext i32 %2 to i64
+  %mul40 = mul i64 %conv39, %tmp.0
+  %add41 = add i64 %mul40, 2147483648
+  %shr42 = lshr i64 %add41, 32
+  br label %if.end43
+
+if.end43:                                         ; preds = %if.else37, %if.then31
+  %tmp.1 = phi i64 [ %shr36, %if.then31 ], [ %shr42, %if.else37 ]
+  %cmp49 = icmp ult i64 %tmp.1, 2147483647
+  %3 = trunc i64 %tmp.1 to i32
+  %conv51 = select i1 %cmp49, i32 %3, i32 2147483647
+  ret i32 %conv51
+}
+
+declare i32 @get_update_sysctl_factor() #0
+declare i32 @__bitmap_weight(i32*, i32) #1
+
+attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!"branch_weights", i32 64, i32 4}
+!6 = !{!7, !8, i64 4}
+!7 = !{!"load_weight", !8, i64 0, !8, i64 4}
+!8 = !{!"long", !3, i64 0}
+!9 = !{!7, !8, i64 0}
+!10 = !{!"branch_weights", i32 4, i32 64}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"any pointer", !3, i64 0}
+!13 = !{!3, !3, i64 0}
+!14 = !{i32 45854, i32 45878}
diff --git a/test/CodeGen/Hexagon/expand-condsets-undef.ll b/test/CodeGen/Hexagon/expand-condsets-undef.ll
new file mode 100644
index 0000000..85e72aa
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-condsets-undef.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O2 < %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind optsize ssp
+define internal fastcc void @foo() nounwind {
+if.else473:
+  %0 = load i64, i64* undef, align 8
+  %sub = sub nsw i64 undef, %0
+  %conv476 = sitofp i64 %sub to double
+  %mul477 = fmul double %conv476, 0x3F50624DE0000000
+  br i1 undef, label %cond.true540, label %cond.end548
+
+cond.true540:
+  %1 = fptrunc double %mul477 to float
+  %2 = fptosi float %1 to i32
+  br label %cond.end548
+
+cond.end548:
+  %cond549 = phi i32 [ %2, %cond.true540 ], [ undef, %if.else473 ]
+  call void @bar(i32 %cond549) nounwind
+  unreachable
+}
+
+declare void @bar(i32) nounwind
+
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
index 41cecec..ba98f62 100644
--- a/test/CodeGen/Hexagon/i16_VarArg.ll
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -35,6 +35,6 @@ define i32 @main() {
         %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
         %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
         %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
-        call i32 (i8*, ...)* @printf( i8* %lt_s, i16 %val1 )
+        call i32 (i8*, ...) @printf( i8* %lt_s, i16 %val1 )
         ret i32 0
 }
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
index 8b5625c..1908b3c 100644
--- a/test/CodeGen/Hexagon/i1_VarArg.ll
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -34,11 +34,11 @@ define i32 @main() {
         %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
         %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
         %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
-        call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )
-        call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )
-        call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )
-        call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )
-        call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )
-        call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )
+        call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r )
+        call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r )
+        call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r )
+        call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r )
+        call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r )
+        call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r )
         ret i32 0
 }
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
index 7283ba4..c40a6a9 100644
--- a/test/CodeGen/Hexagon/i8_VarArg.ll
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -35,6 +35,6 @@ define i32 @main() {
         %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
         %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
         %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
-        call i32 (i8*, ...)* @printf( i8* %lt_s, i8 %val1 )
+        call i32 (i8*, ...) @printf( i8* %lt_s, i8 %val1 )
         ret i32 0
 }
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
index 735ac9e..1a78822 100644
--- a/test/CodeGen/Hexagon/sube.ll
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
 
 ; CHECK: r{{[0-9]+:[0-9]+}} = #0
 ; CHECK: r{{[0-9]+:[0-9]+}} = #1
diff --git a/test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll b/test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll
new file mode 100644
index 0000000..90fb75e
--- /dev/null
+++ b/test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy:
+; CHECK: jump memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memmove:
+; CHECK: jump memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memset:
+; CHECK: jump memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index fc44a98..0af0293 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -20,7 +20,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !3 = !MDSubroutineType(types: !4)
 !4 = !{!5}
 !5 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !{i32 0}
+!6 = !{}
 !7 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !8, file: !1, type: !5)
 !8 = distinct !MDLexicalBlock(line: 2, column: 12, file: !12, scope: !0)
 !9 = !MDLocation(line: 3, column: 11, scope: !8)
diff --git a/test/CodeGen/Mips/Fast-ISel/logopm.ll b/test/CodeGen/Mips/Fast-ISel/logopm.ll
index cfb751f..0f0c3bf 100644
--- a/test/CodeGen/Mips/Fast-ISel/logopm.ll
+++ b/test/CodeGen/Mips/Fast-ISel/logopm.ll
@@ -68,11 +68,12 @@ entry:
 
 ; Function Attrs: noinline nounwind
 define void @andUb1() #0 {
+; clang uses i8 constants for booleans, so we test with an i8 1.
 entry:
-  %0 = load i8, i8* @ub1, align 1, !tbaa !2
-  %conv = trunc i8 %0 to i1
-  %and = and i1 %conv, 1
-  %conv1 = zext i1 %and to i8
+  %x = load i8, i8* @ub1, align 1, !tbaa !2
+  %and = and i8 %x, 1
+  %conv = trunc i8 %and to i1
+  %conv1 = zext i1 %conv to i8
   store i8 %conv1, i8* @ub, align 1, !tbaa !2
 ; CHECK-LABEL:  .ent    andUb1
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
@@ -138,10 +139,10 @@ entry:
 ; Function Attrs: noinline nounwind
 define void @orUb1() #0 {
 entry:
-  %0 = load i8, i8* @ub1, align 1, !tbaa !2
-  %conv = trunc i8 %0 to i1
-  %or = or i1 %conv, 1
-  %conv1 = zext i1 %or to i8
+  %x = load i8, i8* @ub1, align 1, !tbaa !2
+  %or = or i8 %x, 1
+  %conv = trunc i8 %or to i1
+  %conv1 = zext i1 %conv to i8
   store i8 %conv1, i8* @ub, align 1, !tbaa !2
 ; CHECK-LABEL:  .ent    orUb1
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
@@ -208,10 +209,10 @@ entry:
 ; Function Attrs: noinline nounwind
 define void @xorUb1() #0 {
 entry:
-  %0 = load i8, i8* @ub1, align 1, !tbaa !2
-  %conv = trunc i8 %0 to i1
-  %xor = xor i1 %conv, 1
-  %conv1 = zext i1 %xor to i8
+  %x = load i8, i8* @ub1, align 1, !tbaa !2
+  %xor = xor i8 1, %x
+  %conv = trunc i8 %xor to i1
+  %conv1 = zext i1 %conv to i8
   store i8 %conv1, i8* @ub, align 1, !tbaa !2
 ; CHECK-LABEL:  .ent    xorUb1
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
diff --git a/test/CodeGen/Mips/adjust-callstack-sp.ll b/test/CodeGen/Mips/adjust-callstack-sp.ll
new file mode 100644
index 0000000..8c61a65
--- /dev/null
+++ b/test/CodeGen/Mips/adjust-callstack-sp.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=M16
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips3 | FileCheck %s -check-prefix=GP64
+; RUN: llc < %s -march=mips -mcpu=mips64 | FileCheck %s -check-prefix=GP64
+; RUN: llc < %s -march=mips -mcpu=mips64r6 | FileCheck %s -check-prefix=GP64
+
+declare void @bar(i32*)
+
+define void @foo(i32 %sz) {
+  ; ALL-LABEL: foo:
+
+    ; M16-NOT:        addiu     $sp, 0 # 16 bit inst
+    ; GP32-NOT:       addiu     $sp, $sp, 0
+    ; GP64-NOT:       daddiu    $sp, $sp, 0
+  %a = alloca i32, i32 %sz
+  call void @bar(i32* %a)
+  ret void
+}
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 8967d57..747a136 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -76,7 +76,7 @@ if.end:                                           ; preds = %if.else, %if.then
   %arrayidx24 = getelementptr inbounds i8, i8* %tmp1, i32 24
   %7 = bitcast i8* %arrayidx24 to i32*
   %tmp25 = load i32, i32* %7, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index 4b5d097..d5ecaae 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -60,7 +60,7 @@ if.then:                                          ; preds = %entry
   unreachable
 
 if.end:                                           ; preds = %entry
-  tail call void (...)* @f2() nounwind
+  tail call void (...) @f2() nounwind
   ret void
 }
 
diff --git a/test/CodeGen/Mips/and1.ll b/test/CodeGen/Mips/and1.ll
index be9ba3e..57076a4 100644
--- a/test/CodeGen/Mips/and1.ll
+++ b/test/CodeGen/Mips/and1.ll
@@ -10,7 +10,7 @@ entry:
   %1 = load i32, i32* @y, align 4
   %and = and i32 %0, %1
 ; 16:	and	${{[0-9]+}}, ${{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %and)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %and)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index 920357d..0ff9f5c 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -19,14 +19,14 @@ entry:
   %0 = atomicrmw add i32* %x, i32 1 seq_cst
   %add.i = add nsw i32 %0, 2
   %1 = load volatile i32, i32* %x, align 4
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
   %pair = cmpxchg i32* %x, i32 1, i32 2 seq_cst seq_cst
   %2 = extractvalue { i32, i1 } %pair, 0
   %3 = load volatile i32, i32* %x, align 4
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
   %4 = atomicrmw xchg i32* %x, i32 1 seq_cst
   %5 = load volatile i32, i32* %x, align 4
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind
 ; 16-LABEL: main:
 ; 16:	lw	${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
 ; 16: 	lw	${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/cache-intrinsic.ll b/test/CodeGen/Mips/cache-intrinsic.ll
index 461c181..987032e 100644
--- a/test/CodeGen/Mips/cache-intrinsic.ll
+++ b/test/CodeGen/Mips/cache-intrinsic.ll
@@ -10,10 +10,10 @@ define i32 @main() {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
   %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str1, i32 0, i32 0)) #3
   call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
index abb3601..ba3aeb5 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
@@ -144,7 +144,7 @@ entry:
   %1 = bitcast %struct.SmallStruct_1b* %0 to { i8 }*
   %2 = getelementptr { i8 }, { i8 }* %1, i32 0, i32 0
   %3 = load i8, i8* %2, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8 inreg %3)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1b: 
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
@@ -158,7 +158,7 @@ entry:
   %1 = bitcast %struct.SmallStruct_2b* %0 to { i16 }*
   %2 = getelementptr { i16 }, { i16 }* %1, i32 0, i32 0
   %3 = load i16, i16* %2, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i16 inreg %3)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i16 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_2b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 48
@@ -175,7 +175,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 3, i32 0, i1 false)
   %3 = getelementptr { i24 }, { i24 }* %.coerce, i32 0, i32 0
   %4 = load i24, i24* %3, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i24 inreg %4)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i24 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_3b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 40
@@ -191,7 +191,7 @@ entry:
   %1 = bitcast %struct.SmallStruct_4b* %0 to { i32 }*
   %2 = getelementptr { i32 }, { i32 }* %1, i32 0, i32 0
   %3 = load i32, i32* %2, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_4b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32
@@ -208,7 +208,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 5, i32 0, i1 false)
   %3 = getelementptr { i40 }, { i40 }* %.coerce, i32 0, i32 0
   %4 = load i40, i40* %3, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i40 inreg %4)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i40 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_5b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24
@@ -225,7 +225,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
   %4 = load i48, i48* %3, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_6b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
@@ -242,7 +242,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 7, i32 0, i1 false)
   %3 = getelementptr { i56 }, { i56 }* %.coerce, i32 0, i32 0
   %4 = load i56, i56* %3, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i56 inreg %4)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i56 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_7b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 8
@@ -256,7 +256,7 @@ entry:
   %1 = bitcast %struct.SmallStruct_8b* %0 to { i64 }*
   %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
   %3 = load i64, i64* %2, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_8b:
  ; CHECK-NOT: dsll
@@ -275,7 +275,7 @@ entry:
   %4 = load i64, i64* %3, align 1
   %5 = getelementptr { i64, i8 }, { i64, i8 }* %.coerce, i32 0, i32 1
   %6 = load i8, i8* %5, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %4, i8 inreg %6)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %4, i8 inreg %6)
   ret void
  ; CHECK-LABEL: smallStruct_9b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
index 7da6ab1..74d3d85 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
@@ -78,7 +78,7 @@ entry:
   %1 = bitcast %struct.SmallStruct_1b1s* %0 to { i32 }*
   %2 = getelementptr { i32 }, { i32 }* %1, i32 0, i32 0
   %3 = load i32, i32* %2, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1b1s:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32
@@ -92,7 +92,7 @@ entry:
   %1 = bitcast %struct.SmallStruct_1b1i* %0 to { i64 }*
   %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
   %3 = load i64, i64* %2, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1b1i:
  ; CHECK-NOT: dsll
@@ -109,7 +109,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
   %4 = load i48, i48* %3, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_1b1s1b:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
@@ -125,7 +125,7 @@ entry:
   %1 = bitcast %struct.SmallStruct_1s1i* %0 to { i64 }*
   %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
   %3 = load i64, i64* %2, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1s1i:
  ; CHECK-NOT: dsll
@@ -142,7 +142,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
   %4 = load i48, i48* %3, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_3b1s:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
index f70b75f..a4ac5e7 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
@@ -146,7 +146,7 @@ entry:
   %33 = bitcast %struct.SmallStruct_1b* %8 to { i8 }*
   %34 = getelementptr { i8 }, { i8 }* %33, i32 0, i32 0
   %35 = load i8, i8* %34, align 1
-  call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8 inreg %11, i8 inreg %14, i8 inreg %17, i8 inreg %20, i8 inreg %23, i8 inreg %26, i8 inreg %29, i8 inreg %32, i8 inreg %35)
+  call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8 inreg %11, i8 inreg %14, i8 inreg %17, i8 inreg %20, i8 inreg %23, i8 inreg %26, i8 inreg %29, i8 inreg %32, i8 inreg %35)
   ret void
  ; CHECK-LABEL: smallStruct_1b_x9:
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
diff --git a/test/CodeGen/Mips/cfi_offset.ll b/test/CodeGen/Mips/cfi_offset.ll
index 6e78344..9723332 100644
--- a/test/CodeGen/Mips/cfi_offset.ll
+++ b/test/CodeGen/Mips/cfi_offset.ll
@@ -34,7 +34,7 @@ define void @bar() {
 
     %val1 = load volatile double, double* @var
     %val2 = load volatile double, double* @var
-    call void (...)* @foo() nounwind
+    call void (...) @foo() nounwind
     store volatile double %val1, double* @var
     store volatile double %val2, double* @var
     ret void
diff --git a/test/CodeGen/Mips/dagcombine_crash.ll b/test/CodeGen/Mips/dagcombine_crash.ll
new file mode 100644
index 0000000..6fcf2b4
--- /dev/null
+++ b/test/CodeGen/Mips/dagcombine_crash.ll
@@ -0,0 +1,25 @@
+; RUN: llc -o - %s | FileCheck %s
+; The selection DAG select(select()) normalisation crashed for different types
+; on the condition inputs.
+target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
+target triple = "mips--"
+
+; CHECK-LABEL: foobar
+; CHECK: sltiu ${{[0-9]*}}, ${{[0-9]*}}, 42
+; CHECK: sltiu ${{[0-9]*}}, ${{[0-9]*}}, 23
+; CHECK: and ${{[0-9]*}}, ${{[0-9]*}}, ${{[0-9]*}}
+; CHECK: sltu ${{[0-9]*}}, ${{[0-9]*}}, ${{[0-9]*}}
+; CHECK: addiu ${{[0-9]*}}, ${{[0-9]*}}, -1
+; CHECK: movn ${{[0-9]*}}, ${{[0-9]*}}, ${{[0-9]*}}
+; CHECK: jr $ra
+; CHECK: move ${{[0-9]*}}, ${{[0-9]*}}
+define i64 @foobar(i32 %arg) #0 {
+entry:
+  %cmp0 = icmp ult i32 %arg, 23
+  %cmp1 = icmp ult i32 %arg, 42
+  %and = and i1 %cmp0, %cmp1
+  %cmp2 = icmp ugt i32 %arg, 0
+  %sext = sext i1 %cmp1 to i64
+  %retval.0 = select i1 %and, i64 %sext, i64 0
+  ret i64 %retval.0
+}
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
index 748050c..542c5bf 100644
--- a/test/CodeGen/Mips/eh-return32.ll
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -7,7 +7,7 @@ declare void @foo(...)
 
 define i8* @f1(i32 %offset, i8* %handler) {
 entry:
-  call void (...)* @foo()
+  call void (...) @foo()
   call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
   unreachable
 
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
index 74a4323..2f8203d 100644
--- a/test/CodeGen/Mips/eh-return64.ll
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -8,7 +8,7 @@ declare void @foo(...)
 
 define void @f1(i64 %offset, i8* %handler) {
 entry:
-  call void (...)* @foo()
+  call void (...) @foo()
   call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
   unreachable
 
diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll
index 311b830..27d7094 100644
--- a/test/CodeGen/Mips/fpbr.ll
+++ b/test/CodeGen/Mips/fpbr.ll
@@ -24,11 +24,11 @@ entry:
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g0() nounwind
+  tail call void (...) @g0() nounwind
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  tail call void (...)* @g1() nounwind
+  tail call void (...) @g1() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -57,11 +57,11 @@ entry:
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g0() nounwind
+  tail call void (...) @g0() nounwind
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  tail call void (...)* @g1() nounwind
+  tail call void (...) @g1() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -86,11 +86,11 @@ entry:
   br i1 %cmp, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g0() nounwind
+  tail call void (...) @g0() nounwind
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  tail call void (...)* @g1() nounwind
+  tail call void (...) @g1() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -116,11 +116,11 @@ entry:
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g0() nounwind
+  tail call void (...) @g0() nounwind
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  tail call void (...)* @g1() nounwind
+  tail call void (...) @g1() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -145,11 +145,11 @@ entry:
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g0() nounwind
+  tail call void (...) @g0() nounwind
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  tail call void (...)* @g1() nounwind
+  tail call void (...) @g1() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -174,11 +174,11 @@ entry:
   br i1 %cmp, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g0() nounwind
+  tail call void (...) @g0() nounwind
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  tail call void (...)* @g1() nounwind
+  tail call void (...) @g1() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
diff --git a/test/CodeGen/Mips/gprestore.ll b/test/CodeGen/Mips/gprestore.ll
index 0b005ab..b1c2ad1 100644
--- a/test/CodeGen/Mips/gprestore.ll
+++ b/test/CodeGen/Mips/gprestore.ll
@@ -17,7 +17,7 @@ entry:
 ; CHECK: jalr
 ; CHECK-NOT: got({{.*}})($gp)
 ; CHECK: lw $gp
-  tail call void (...)* @f1() nounwind
+  tail call void (...) @f1() nounwind
   %tmp = load i32, i32* @p, align 4
   tail call void @f2(i32 %tmp) nounwind
   %tmp1 = load i32, i32* @q, align 4
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index 768abc2..a0dbdf3 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -12,7 +12,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
   ret i32 0
 
 ; SR: 	.set	mips16
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
index 59cf413..3b3f8f7 100644
--- a/test/CodeGen/Mips/hf16call32.ll
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -77,7 +77,7 @@ entry:
   %4 = load float, float* @lx, align 4
   %cmp = fcmp oeq float %3, %4
   %conv2 = zext i1 %cmp to i32
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2)
   call void @clear()
   store double 0x41678C29C0000000, double* @lxd, align 8
   %5 = load double, double* @lxd, align 8
@@ -88,7 +88,7 @@ entry:
   %9 = load double, double* @lxd, align 8
   %cmp3 = fcmp oeq double %8, %9
   %conv4 = zext i1 %cmp3 to i32
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4)
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4)
   call void @clear()
   store float 9.000000e+00, float* @lx, align 4
   store float 1.000000e+01, float* @ly, align 4
@@ -117,7 +117,7 @@ land.rhs:                                         ; preds = %entry
 land.end:                                         ; preds = %land.rhs, %entry
   %20 = phi i1 [ false, %entry ], [ %cmp12, %land.rhs ]
   %land.ext = zext i1 %20 to i32
-  %call14 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv6, double %conv7, double %conv8, double %conv9, i32 %land.ext)
+  %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv6, double %conv7, double %conv8, double %conv9, i32 %land.ext)
   call void @clear()
   store float 0x3FFE666660000000, float* @lx, align 4
   store double 0x4007E613249FF279, double* @lyd, align 8
@@ -139,7 +139,7 @@ land.end:                                         ; preds = %land.rhs, %entry
   %cmp19 = fcmp oeq double %29, %30
   %conv20 = zext i1 %cmp19 to i32
   %and = and i32 %conv18, %conv20
-  %call21 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv15, double %conv16, double %25, double %26, i32 %and)
+  %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv15, double %conv16, double %25, double %26, i32 %and)
   call void @clear()
   store double 0x4194E54F94000000, double* @lxd, align 8
   store float 7.600000e+01, float* @ly, align 4
@@ -161,7 +161,7 @@ land.end:                                         ; preds = %land.rhs, %entry
   %cmp26 = fcmp oeq float %39, %40
   %conv27 = zext i1 %cmp26 to i32
   %and28 = and i32 %conv25, %conv27
-  %call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %33, double %34, double %conv22, double %conv23, i32 %and28)
+  %call29 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %33, double %34, double %conv22, double %conv23, i32 %and28)
   call void @clear()
   store double 7.365198e+07, double* @lxd, align 8
   store double 0x416536CD80000000, double* @lyd, align 8
@@ -181,7 +181,7 @@ land.end:                                         ; preds = %land.rhs, %entry
   %cmp32 = fcmp oeq double %49, %50
   %conv33 = zext i1 %cmp32 to i32
   %and34 = and i32 %conv31, %conv33
-  %call35 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %43, double %44, double %45, double %46, i32 %and34)
+  %call35 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %43, double %44, double %45, double %46, i32 %and34)
   call void @clear()
   store float 0x4016666660000000, float* @ret_sf, align 4
   %call36 = call float @sf_v()
@@ -194,7 +194,7 @@ land.end:                                         ; preds = %land.rhs, %entry
   %54 = load float, float* @lret_sf, align 4
   %cmp39 = fcmp oeq float %53, %54
   %conv40 = zext i1 %cmp39 to i32
-  %call41 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40)
+  %call41 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40)
   call void @clear()
   store float 4.587300e+06, float* @ret_sf, align 4
   store float 3.420000e+02, float* @lx, align 4
@@ -218,7 +218,7 @@ land.end:                                         ; preds = %land.rhs, %entry
   %cmp49 = fcmp oeq float %62, %63
   %conv50 = zext i1 %cmp49 to i32
   %and51 = and i32 %conv48, %conv50
-  %call52 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv43, double %conv44, double %conv45, double %conv46, i32 %and51)
+  %call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv43, double %conv44, double %conv45, double %conv46, i32 %and51)
   call void @clear()
   store float 4.445910e+06, float* @ret_sf, align 4
   store double 0x419A7DB294000000, double* @lxd, align 8
@@ -240,7 +240,7 @@ land.end:                                         ; preds = %land.rhs, %entry
   %cmp58 = fcmp oeq double %71, %72
   %conv59 = zext i1 %cmp58 to i32
   %and60 = and i32 %conv57, %conv59
-  %call61 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv54, double %conv55, double %67, double %68, i32 %and60)
+  %call61 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv54, double %conv55, double %67, double %68, i32 %and60)
   call void @clear()
   store float 0x3FFF4BC6A0000000, float* @ret_sf, align 4
   store float 4.445500e+03, float* @lx, align 4
@@ -281,7 +281,7 @@ land.rhs73:                                       ; preds = %land.lhs.true
 land.end76:                                       ; preds = %land.rhs73, %land.lhs.true, %land.end
   %87 = phi i1 [ false, %land.lhs.true ], [ false, %land.end ], [ %cmp74, %land.rhs73 ]
   %land.ext77 = zext i1 %87 to i32
-  %call78 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv63, double %conv64, double %conv65, double %conv66, double %conv67, double %conv68, i32 %land.ext77)
+  %call78 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv63, double %conv64, double %conv65, double %conv66, double %conv67, double %conv68, i32 %land.ext77)
   call void @clear()
   store float 9.991300e+04, float* @ret_sf, align 4
   store float 1.114500e+04, float* @lx, align 4
@@ -320,7 +320,7 @@ land.rhs89:                                       ; preds = %land.lhs.true86
 land.end92:                                       ; preds = %land.rhs89, %land.lhs.true86, %land.end76
   %102 = phi i1 [ false, %land.lhs.true86 ], [ false, %land.end76 ], [ %cmp90, %land.rhs89 ]
   %land.ext93 = zext i1 %102 to i32
-  %call94 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv80, double %conv81, double %conv82, double %conv83, double %94, double %95, i32 %land.ext93)
+  %call94 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv80, double %conv81, double %conv82, double %conv83, double %94, double %95, i32 %land.ext93)
   call void @clear()
   store float 0x417CCC7A00000000, float* @ret_sf, align 4
   store double 0x4172034530000000, double* @lxd, align 8
@@ -359,7 +359,7 @@ land.rhs105:                                      ; preds = %land.lhs.true102
 land.end108:                                      ; preds = %land.rhs105, %land.lhs.true102, %land.end92
   %117 = phi i1 [ false, %land.lhs.true102 ], [ false, %land.end92 ], [ %cmp106, %land.rhs105 ]
   %land.ext109 = zext i1 %117 to i32
-  %call110 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv96, double %conv97, double %107, double %108, double %conv98, double %conv99, i32 %land.ext109)
+  %call110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv96, double %conv97, double %107, double %108, double %conv98, double %conv99, i32 %land.ext109)
   call void @clear()
   store float 3.987721e+06, float* @ret_sf, align 4
   store double 0x3FF1F49F6DDDC2D8, double* @lxd, align 8
@@ -396,7 +396,7 @@ land.rhs119:                                      ; preds = %land.lhs.true116
 land.end122:                                      ; preds = %land.rhs119, %land.lhs.true116, %land.end108
   %132 = phi i1 [ false, %land.lhs.true116 ], [ false, %land.end108 ], [ %cmp120, %land.rhs119 ]
   %land.ext123 = zext i1 %132 to i32
-  %call124 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv112, double %conv113, double %122, double %123, double %124, double %125, i32 %land.ext123)
+  %call124 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv112, double %conv113, double %122, double %123, double %124, double %125, i32 %land.ext123)
   call void @clear()
   store double 1.561234e+01, double* @ret_df, align 8
   %call125 = call double @df_v()
@@ -407,7 +407,7 @@ land.end122:                                      ; preds = %land.rhs119, %land.
   %136 = load double, double* @lret_df, align 8
   %cmp126 = fcmp oeq double %135, %136
   %conv127 = zext i1 %cmp126 to i32
-  %call128 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127)
+  %call128 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127)
   call void @clear()
   store double 1.345873e+01, double* @ret_df, align 8
   store float 3.434520e+05, float* @lx, align 4
@@ -429,7 +429,7 @@ land.end122:                                      ; preds = %land.rhs119, %land.
   %cmp134 = fcmp oeq float %144, %145
   %conv135 = zext i1 %cmp134 to i32
   %and136 = and i32 %conv133, %conv135
-  %call137 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %138, double %139, double %conv130, double %conv131, i32 %and136)
+  %call137 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %138, double %139, double %conv130, double %conv131, i32 %and136)
   call void @clear()
   store double 0x4084F3AB7AA25D8D, double* @ret_df, align 8
   store double 0x4114F671D2F1A9FC, double* @lxd, align 8
@@ -449,7 +449,7 @@ land.end122:                                      ; preds = %land.rhs119, %land.
   %cmp141 = fcmp oeq double %153, %154
   %conv142 = zext i1 %cmp141 to i32
   %and143 = and i32 %conv140, %conv142
-  %call144 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %147, double %148, double %149, double %150, i32 %and143)
+  %call144 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %147, double %148, double %149, double %150, i32 %and143)
   call void @clear()
   store double 6.781956e+03, double* @ret_df, align 8
   store float 4.445500e+03, float* @lx, align 4
@@ -488,7 +488,7 @@ land.rhs155:                                      ; preds = %land.lhs.true152
 land.end158:                                      ; preds = %land.rhs155, %land.lhs.true152, %land.end122
   %169 = phi i1 [ false, %land.lhs.true152 ], [ false, %land.end122 ], [ %cmp156, %land.rhs155 ]
   %land.ext159 = zext i1 %169 to i32
-  %call160 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %157, double %158, double %conv146, double %conv147, double %conv148, double %conv149, i32 %land.ext159)
+  %call160 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %157, double %158, double %conv146, double %conv147, double %conv148, double %conv149, i32 %land.ext159)
   call void @clear()
   store double 1.889130e+05, double* @ret_df, align 8
   store float 9.111450e+05, float* @lx, align 4
@@ -525,7 +525,7 @@ land.rhs169:                                      ; preds = %land.lhs.true166
 land.end172:                                      ; preds = %land.rhs169, %land.lhs.true166, %land.end158
   %184 = phi i1 [ false, %land.lhs.true166 ], [ false, %land.end158 ], [ %cmp170, %land.rhs169 ]
   %land.ext173 = zext i1 %184 to i32
-  %call174 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %172, double %173, double %conv162, double %conv163, double %176, double %177, i32 %land.ext173)
+  %call174 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %172, double %173, double %conv162, double %conv163, double %176, double %177, i32 %land.ext173)
   call void @clear()
   store double 0x418B2DB900000000, double* @ret_df, align 8
   store double 0x41B1EF2ED3000000, double* @lxd, align 8
@@ -562,7 +562,7 @@ land.rhs183:                                      ; preds = %land.lhs.true180
 land.end186:                                      ; preds = %land.rhs183, %land.lhs.true180, %land.end172
   %199 = phi i1 [ false, %land.lhs.true180 ], [ false, %land.end172 ], [ %cmp184, %land.rhs183 ]
   %land.ext187 = zext i1 %199 to i32
-  %call188 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %187, double %188, double %189, double %190, double %conv176, double %conv177, i32 %land.ext187)
+  %call188 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %187, double %188, double %189, double %190, double %conv176, double %conv177, i32 %land.ext187)
   call void @clear()
   store double 3.987721e+06, double* @ret_df, align 8
   store double 5.223560e+00, double* @lxd, align 8
@@ -597,7 +597,7 @@ land.rhs195:                                      ; preds = %land.lhs.true192
 land.end198:                                      ; preds = %land.rhs195, %land.lhs.true192, %land.end186
   %214 = phi i1 [ false, %land.lhs.true192 ], [ false, %land.end186 ], [ %cmp196, %land.rhs195 ]
   %land.ext199 = zext i1 %214 to i32
-  %call200 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %202, double %203, double %204, double %205, double %206, double %207, i32 %land.ext199)
+  %call200 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %202, double %203, double %204, double %205, double %206, double %207, i32 %land.ext199)
   call void @clear()
   store float 4.500000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
   store float 7.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
@@ -630,7 +630,7 @@ land.end198:                                      ; preds = %land.rhs195, %land.
   %cmp.i = fcmp oeq float %ret_sc.imag215, %lret_sc.imag217
   %and.ri = and i1 %cmp.r, %cmp.i
   %conv218 = zext i1 %and.ri to i32
-  %call219 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str3, i32 0, i32 0), double %conv202, double %conv207, double %conv208, double %conv213, i32 %conv218)
+  %call219 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str3, i32 0, i32 0), double %conv202, double %conv207, double %conv208, double %conv213, i32 %conv218)
   call void @clear()
   store float 0x3FF7A99300000000, float* @lx, align 4
   store float 4.500000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
@@ -679,7 +679,7 @@ land.rhs247:                                      ; preds = %land.end198
 land.end250:                                      ; preds = %land.rhs247, %land.end198
   %224 = phi i1 [ false, %land.end198 ], [ %cmp248, %land.rhs247 ]
   %land.ext251 = zext i1 %224 to i32
-  %call252 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str4, i32 0, i32 0), double %conv223, double %conv228, double %conv231, double %conv236, double %conv237, double %conv238, i32 %land.ext251)
+  %call252 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str4, i32 0, i32 0), double %conv223, double %conv228, double %conv231, double %conv236, double %conv237, double %conv238, i32 %land.ext251)
   call void @clear()
   store double 1.234500e+03, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
   store double 7.677000e+03, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
@@ -704,7 +704,7 @@ land.end250:                                      ; preds = %land.rhs247, %land.
   %cmp.i263 = fcmp oeq double %ret_dc.imag259, %lret_dc.imag261
   %and.ri264 = and i1 %cmp.r262, %cmp.i263
   %conv265 = zext i1 %and.ri264 to i32
-  %call266 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str3, i32 0, i32 0), double %ret_dc.real, double %ret_dc.imag255, double %lret_dc.real, double %lret_dc.imag257, i32 %conv265)
+  %call266 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str3, i32 0, i32 0), double %ret_dc.real, double %ret_dc.imag255, double %lret_dc.real, double %lret_dc.imag257, i32 %conv265)
   call void @clear()
   store double 0x40AAF6F532617C1C, double* @lxd, align 8
   store double 4.444500e+03, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
@@ -745,7 +745,7 @@ land.rhs286:                                      ; preds = %land.end250
 land.end289:                                      ; preds = %land.rhs286, %land.end250
   %234 = phi i1 [ false, %land.end250 ], [ %cmp287, %land.rhs286 ]
   %land.ext290 = zext i1 %234 to i32
-  %call291 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290)
+  %call291 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290)
   %235 = load i32, i32* %retval
   ret i32 %235
 }
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
index de809f1..c9f1fe9 100644
--- a/test/CodeGen/Mips/hfptrcall.ll
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -70,12 +70,12 @@ entry:
   store float %call, float* @x, align 4
   %1 = load float, float* @x, align 4
   %conv = fpext float %1 to double
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %conv)
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %conv)
   %2 = load double ()*, double ()** @ptrdv, align 4
   %call2 = call double %2()
   store double %call2, double* @xd, align 8
   %3 = load double, double* @xd, align 8
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %3)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %3)
   %4 = load { float, float } ()*, { float, float } ()** @ptrscv, align 4
   %call4 = call { float, float } %4()
   %5 = extractvalue { float, float } %call4, 0
@@ -90,7 +90,7 @@ entry:
   %xy.imag8 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 1)
   %conv9 = fpext float %xy.real7 to double
   %conv10 = fpext float %xy.imag8 to double
-  %call11 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str1, i32 0, i32 0), double %conv5, double %conv10)
+  %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str1, i32 0, i32 0), double %conv5, double %conv10)
   %7 = load { double, double } ()*, { double, double } ()** @ptrdcv, align 4
   %call12 = call { double, double } %7()
   %8 = extractvalue { double, double } %call12, 0
@@ -101,7 +101,7 @@ entry:
   %xyd.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 1)
   %xyd.real13 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 0)
   %xyd.imag14 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 1)
-  %call15 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str1, i32 0, i32 0), double %xyd.real, double %xyd.imag14)
+  %call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str1, i32 0, i32 0), double %xyd.real, double %xyd.imag14)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll
index 5c5761f..ba9cf73 100644
--- a/test/CodeGen/Mips/i32k.ll
+++ b/test/CodeGen/Mips/i32k.ll
@@ -4,14 +4,14 @@
 
 define i32 @main() nounwind {
 entry:
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind
 ; 16:	lw	${{[0-9]+}}, 1f
 ; 16:	b	2f
 ; 16:	.align	2
 ; 16: 1: 	.word	1075344593
 ; 16: 2:
 
-  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind
 
 ; 16:	lw	${{[0-9]+}}, 1f
 ; 16:	b	2f
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 76b73dc..868433e 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -51,14 +51,5 @@ entry:
 ; CHECK: #NO_APP	
   tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
 
-; Now R Which takes the address of c
-  %c = alloca i32, align 4
-  store i32 -4469539, i32* %c, align 4
-  %8 = call i32 asm sideeffect "lw $0, 1 + $1\0A\09lw $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
-; CHECK: #APP
-; CHECK: lw ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
-; CHECK: lw ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
-; CHECK: #NO_APP	
-
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/inlineasm_constraint_R.ll b/test/CodeGen/Mips/inlineasm_constraint_R.ll
new file mode 100644
index 0000000..c4105ae
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm_constraint_R.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@data = global [8193 x i32] zeroinitializer
+
+define void @R(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: R:
+
+  call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0))
+
+  ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK: #APP
+  ; CHECK: lw $1, 0($[[BASEPTR]])
+  ; CHECK: #NO_APP
+
+  ret void
+}
+
+define void @R_offset_4(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: R_offset_4:
+
+  call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1))
+
+  ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK: #APP
+  ; CHECK: lw $1, 4($[[BASEPTR]])
+  ; CHECK: #NO_APP
+
+  ret void
+}
+
+define void @R_offset_254(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: R_offset_254:
+
+  call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 63))
+
+  ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK: #APP
+  ; CHECK: lw $1, 252($[[BASEPTR]])
+  ; CHECK: #NO_APP
+
+  ret void
+}
+
+define void @R_offset_256(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: R_offset_256:
+
+  call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 64))
+
+  ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 256
+  ; CHECK: #APP
+  ; CHECK: lw $1, 0($[[BASEPTR2]])
+  ; CHECK: #NO_APP
+
+  ret void
+}
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
index bde7357..2b4a039 100644
--- a/test/CodeGen/Mips/internalfunc.ll
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -21,7 +21,7 @@ entry:
 
 if.then:                                          ; preds = %entry
   %tmp1 = load void (...)*, void (...)** @caller.sf1, align 4
-  tail call void (...)* %tmp1() nounwind
+  tail call void (...) %tmp1() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.then
@@ -38,7 +38,7 @@ if.end:                                           ; preds = %entry, %if.then
 
 define internal void @sf2() nounwind {
 entry:
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) nounwind
   ret void
 }
 
@@ -46,7 +46,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 define internal fastcc void @f2() nounwind noinline {
 entry:
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) nounwind
   ret void
 }
 
diff --git a/test/CodeGen/Mips/lb1.ll b/test/CodeGen/Mips/lb1.ll
index ad94c5f..21648d7 100644
--- a/test/CodeGen/Mips/lb1.ll
+++ b/test/CodeGen/Mips/lb1.ll
@@ -11,7 +11,7 @@ entry:
   %conv = sext i8 %0 to i32
   store i32 %conv, i32* %i, align 4
   %1 = load i32, i32* %i, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/lbu1.ll b/test/CodeGen/Mips/lbu1.ll
index a8ef2ff..28ca271 100644
--- a/test/CodeGen/Mips/lbu1.ll
+++ b/test/CodeGen/Mips/lbu1.ll
@@ -12,7 +12,7 @@ entry:
   store i32 %conv, i32* %i, align 4
   %1 = load i8, i8* @c, align 1
   %conv1 = zext i8 %1 to i32
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/lh1.ll b/test/CodeGen/Mips/lh1.ll
index 3b245b1..31967e5 100644
--- a/test/CodeGen/Mips/lh1.ll
+++ b/test/CodeGen/Mips/lh1.ll
@@ -11,7 +11,7 @@ entry:
 ; 16:	lh	${{[0-9]+}}, 0(${{[0-9]+}})
   store i32 %conv, i32* %i, align 4
   %1 = load i32, i32* %i, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/lhu1.ll b/test/CodeGen/Mips/lhu1.ll
index 02abfb7..413da46 100644
--- a/test/CodeGen/Mips/lhu1.ll
+++ b/test/CodeGen/Mips/lhu1.ll
@@ -12,7 +12,7 @@ entry:
 ; 16:	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
   store i32 %conv, i32* %i, align 4
   %1 = load i32, i32* %i, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/mbrsize4a.ll b/test/CodeGen/Mips/mbrsize4a.ll
index e7ca776..a99facc 100644
--- a/test/CodeGen/Mips/mbrsize4a.ll
+++ b/test/CodeGen/Mips/mbrsize4a.ll
@@ -17,7 +17,7 @@ z:                                                ; preds = %y, %entry
   br label %y
 
 y:                                                ; preds = %z
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0))
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0))
   br label %z
 
 return:                                           ; No predecessors!
diff --git a/test/CodeGen/Mips/micromips-addiu.ll b/test/CodeGen/Mips/micromips-addiu.ll
index 3035782..e0743c9 100644
--- a/test/CodeGen/Mips/micromips-addiu.ll
+++ b/test/CodeGen/Mips/micromips-addiu.ll
@@ -10,17 +10,17 @@ define i32 @main() nounwind {
 entry:
   %0 = load i32, i32* @x, align 4
   %addiu1 = add i32 %0, -7
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
                                   ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %addiu1)
 
   %1 = load i32, i32* @y, align 4
   %addiu2 = add i32 %1, 55
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
                                   ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %addiu2)
 
   %2 = load i32, i32* @z, align 4
   %addiu3 = add i32 %2, 24
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
                                   ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %addiu3)
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/micromips-andi.ll b/test/CodeGen/Mips/micromips-andi.ll
index cec30e2..cd7a794 100644
--- a/test/CodeGen/Mips/micromips-andi.ll
+++ b/test/CodeGen/Mips/micromips-andi.ll
@@ -9,12 +9,12 @@ define i32 @main() nounwind {
 entry:
   %0 = load i32, i32* @x, align 4
   %and1 = and i32 %0, 4
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
                                   ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %and1)
 
   %1 = load i32, i32* @y, align 4
   %and2 = and i32 %1, 5
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
                                   ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %and2)
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/mips16_32_8.ll b/test/CodeGen/Mips/mips16_32_8.ll
index e79cda5..5e03928 100644
--- a/test/CodeGen/Mips/mips16_32_8.ll
+++ b/test/CodeGen/Mips/mips16_32_8.ll
@@ -28,7 +28,7 @@ entry:
   store float %add, float* @f, align 4
   %2 = load float, float* @f, align 4
   %conv = fpext float %2 to double
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), double %conv)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), double %conv)
   ret void
 }
 
@@ -49,10 +49,10 @@ define i32 @main() #3 {
 entry:
   call void @foo()
   %0 = load i32, i32* @i, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str1, i32 0, i32 0), i32 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str1, i32 0, i32 0), i32 %0)
   call void @nofoo()
   %1 = load i32, i32* @i, align 4
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str2, i32 0, i32 0), i32 %1)
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str2, i32 0, i32 0), i32 %1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
index 25957fb..0f4dc7e 100644
--- a/test/CodeGen/Mips/mips16ex.ll
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -16,7 +16,7 @@ entry:
   %ehselector.slot = alloca i32
   %e = alloca i32, align 4
   store i32 0, i32* %retval
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0))
   %exception = call i8* @__cxa_allocate_exception(i32 4) nounwind
   %0 = bitcast i8* %exception to i32*
   store i32 20, i32* %0
diff --git a/test/CodeGen/Mips/neg1.ll b/test/CodeGen/Mips/neg1.ll
index e2b10e0..36275a2 100644
--- a/test/CodeGen/Mips/neg1.ll
+++ b/test/CodeGen/Mips/neg1.ll
@@ -8,7 +8,7 @@ entry:
   %0 = load i32, i32* @i, align 4
   %sub = sub nsw i32 0, %0
 ; 16:	neg	${{[0-9]+}}, ${{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %sub)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %sub)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/not1.ll b/test/CodeGen/Mips/not1.ll
index bf5d06e..f5ec5b6 100644
--- a/test/CodeGen/Mips/not1.ll
+++ b/test/CodeGen/Mips/not1.ll
@@ -9,7 +9,7 @@ entry:
   %0 = load i32, i32* @x, align 4
   %neg = xor i32 %0, -1
 ; 16:	not	${{[0-9]+}}, ${{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %neg)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %neg)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll
index 97e12e7..499ce3c 100644
--- a/test/CodeGen/Mips/octeon.ll
+++ b/test/CodeGen/Mips/octeon.ll
@@ -93,7 +93,7 @@ entry:
 ; ALL-LABEL: bbit0:
 ; OCTEON: bbit0   $4, 3, $[[BB0:BB[0-9_]+]]
 ; MIPS64: andi  $[[T0:[0-9]+]], $4, 8
-; MIPS64: beqz  $[[T0]], $[[BB0:BB[0-9_]+]]
+; MIPS64: bnez  $[[T0]], $[[BB0:BB[0-9_]+]]
   %bit = and i64 %a, 8
   %res = icmp eq i64 %bit, 0
   br i1 %res, label %endif, label %if
@@ -111,7 +111,7 @@ entry:
 ; MIPS64: daddiu  $[[T0:[0-9]+]], $zero, 1
 ; MIPS64: dsll    $[[T1:[0-9]+]], $[[T0]], 35
 ; MIPS64: and     $[[T2:[0-9]+]], $4, $[[T1]]
-; MIPS64: beqz    $[[T2]], $[[BB0:BB[0-9_]+]]
+; MIPS64: bnez    $[[T2]], $[[BB0:BB[0-9_]+]]
   %bit = and i64 %a, 34359738368
   %res = icmp eq i64 %bit, 0
   br i1 %res, label %endif, label %if
diff --git a/test/CodeGen/Mips/or1.ll b/test/CodeGen/Mips/or1.ll
index 66dd070..51b6ebf 100644
--- a/test/CodeGen/Mips/or1.ll
+++ b/test/CodeGen/Mips/or1.ll
@@ -10,7 +10,7 @@ entry:
   %1 = load i32, i32* @y, align 4
   %or = or i32 %0, %1
 ; 16:	or	${{[0-9]+}}, ${{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %or)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %or)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/return-vector.ll b/test/CodeGen/Mips/return-vector.ll
index 0e0d515..3870fe0 100644
--- a/test/CodeGen/Mips/return-vector.ll
+++ b/test/CodeGen/Mips/return-vector.ll
@@ -12,7 +12,7 @@ declare <4 x double> @d4(...)
 
 define i32 @call_i8() {
 entry:
-  %call = call <8 x i32> (...)* @i8()
+  %call = call <8 x i32> (...) @i8()
   %v0 = extractelement <8 x i32> %call, i32 0
   %v1 = extractelement <8 x i32> %call, i32 1
   %v2 = extractelement <8 x i32> %call, i32 2
@@ -46,7 +46,7 @@ entry:
 
 define float @call_f4() {
 entry:
-  %call = call <4 x float> (...)* @f4()
+  %call = call <4 x float> (...) @f4()
   %v0 = extractelement <4 x float> %call, i32 0
   %v1 = extractelement <4 x float> %call, i32 1
   %v2 = extractelement <4 x float> %call, i32 2
@@ -68,7 +68,7 @@ entry:
 
 define double @call_d4() {
 entry:
-  %call = call <4 x double> (...)* @d4()
+  %call = call <4 x double> (...) @d4()
   %v0 = extractelement <4 x double> %call, i32 0
   %v1 = extractelement <4 x double> %call, i32 1
   %v2 = extractelement <4 x double> %call, i32 2
@@ -99,7 +99,7 @@ declare <2 x double> @d2(...)
 
 define i32 @call_i4() {
 entry:
-  %call = call <4 x i32> (...)* @i4()
+  %call = call <4 x i32> (...) @i4()
   %v0 = extractelement <4 x i32> %call, i32 0
   %v1 = extractelement <4 x i32> %call, i32 1
   %v2 = extractelement <4 x i32> %call, i32 2
@@ -120,7 +120,7 @@ entry:
 
 define float @call_f2() {
 entry:
-  %call = call <2 x float> (...)* @f2()
+  %call = call <2 x float> (...) @f2()
   %v0 = extractelement <2 x float> %call, i32 0
   %v1 = extractelement <2 x float> %call, i32 1
   %add1 = fadd float %v0, %v1
@@ -135,7 +135,7 @@ entry:
 
 define double @call_d2() {
 entry:
-  %call = call <2 x double> (...)* @d2()
+  %call = call <2 x double> (...) @d2()
   %v0 = extractelement <2 x double> %call, i32 0
   %v1 = extractelement <2 x double> %call, i32 1
   %add1 = fadd double %v0, %v1
diff --git a/test/CodeGen/Mips/sb1.ll b/test/CodeGen/Mips/sb1.ll
index 4c17a50..d2e8510 100644
--- a/test/CodeGen/Mips/sb1.ll
+++ b/test/CodeGen/Mips/sb1.ll
@@ -13,7 +13,7 @@ entry:
   %2 = load i8, i8* @c, align 1
   %conv1 = sext i8 %2 to i32
 ; 16:	sb	${{[0-9]+}}, 0(${{[0-9]+}})
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
index e8a5105..5b6aa2a 100644
--- a/test/CodeGen/Mips/selnek.ll
+++ b/test/CodeGen/Mips/selnek.ll
@@ -79,13 +79,13 @@ define i32 @main() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o3
 entry:
   call void @calc_z() "target-cpu"="mips16" "target-features"="+mips16,+o32"
   %0 = load i32, i32* @z1, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
   %1 = load i32, i32* @z2, align 4
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
   %2 = load i32, i32* @z3, align 4
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
   %3 = load i32, i32* @z4, align 4
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/sh1.ll b/test/CodeGen/Mips/sh1.ll
index 1ab7779..3f70b9b 100644
--- a/test/CodeGen/Mips/sh1.ll
+++ b/test/CodeGen/Mips/sh1.ll
@@ -13,7 +13,7 @@ entry:
   %2 = load i16, i16* @s, align 2
   %conv1 = sext i16 %2 to i32
 ; 16:	sh	${{[0-9]+}}, 0(${{[0-9]+}})
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/sll1.ll b/test/CodeGen/Mips/sll1.ll
index 52173b8..4d35b64 100644
--- a/test/CodeGen/Mips/sll1.ll
+++ b/test/CodeGen/Mips/sll1.ll
@@ -12,7 +12,7 @@ entry:
 ; 16:	sll	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
   store i32 %shl, i32* @j, align 4
   %1 = load i32, i32* @j, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/sll2.ll b/test/CodeGen/Mips/sll2.ll
index 0e7194e..dc2236b 100644
--- a/test/CodeGen/Mips/sll2.ll
+++ b/test/CodeGen/Mips/sll2.ll
@@ -12,7 +12,7 @@ entry:
 ; 16:	sllv	${{[0-9]+}}, ${{[0-9]+}}
   store i32 %shl, i32* @i, align 4
   %2 = load i32, i32* @j, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/sra1.ll b/test/CodeGen/Mips/sra1.ll
index ecaba2c..1c7d417 100644
--- a/test/CodeGen/Mips/sra1.ll
+++ b/test/CodeGen/Mips/sra1.ll
@@ -8,7 +8,7 @@ entry:
   %0 = load i32, i32* @i, align 4
   %shr = ashr i32 %0, 3
 ; 16:	sra	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %shr)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/sra2.ll b/test/CodeGen/Mips/sra2.ll
index d5fac8d..771d0f4 100644
--- a/test/CodeGen/Mips/sra2.ll
+++ b/test/CodeGen/Mips/sra2.ll
@@ -10,7 +10,7 @@ entry:
   %1 = load i32, i32* @j, align 4
   %shr = ashr i32 %0, %1
 ; 16:	srav	${{[0-9]+}}, ${{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %shr)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/srl1.ll b/test/CodeGen/Mips/srl1.ll
index dc4d88a..a748eab 100644
--- a/test/CodeGen/Mips/srl1.ll
+++ b/test/CodeGen/Mips/srl1.ll
@@ -11,7 +11,7 @@ entry:
 ; 16:	srl	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
   store i32 %shr, i32* @j, align 4
   %1 = load i32, i32* @j, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/srl2.ll b/test/CodeGen/Mips/srl2.ll
index 8fe088c..6e338b3 100644
--- a/test/CodeGen/Mips/srl2.ll
+++ b/test/CodeGen/Mips/srl2.ll
@@ -13,7 +13,7 @@ entry:
 ; 16:	srlv	${{[0-9]+}}, ${{[0-9]+}}
   store i32 %shr, i32* @j, align 4
   %2 = load i32, i32* @j, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/stchar.ll b/test/CodeGen/Mips/stchar.ll
index ad58794..6bc4889 100644
--- a/test/CodeGen/Mips/stchar.ll
+++ b/test/CodeGen/Mips/stchar.ll
@@ -9,7 +9,7 @@ define void @p1(i16 signext %s, i8 signext %c) nounwind {
 entry:
   %conv = sext i16 %s to i32
   %conv1 = sext i8 %c to i32
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv, i32 %conv1) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv, i32 %conv1) nounwind
   ret void
 }
 
@@ -23,7 +23,7 @@ entry:
   %3 = load i8, i8* %2, align 1
   %conv.i = sext i16 %1 to i32
   %conv1.i = sext i8 %3 to i32
-  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
+  %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
   %4 = load i16*, i16** @sp, align 4
   store i16 32, i16* %4, align 2
   %5 = load i8*, i8** @cp, align 4
@@ -39,7 +39,7 @@ entry:
   store i8 99, i8* %c, align 4
   store i16* %s, i16** @sp, align 4
   store i8* %c, i8** @cp, align 4
-  %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
+  %call.i.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
   %0 = load i16*, i16** @sp, align 4
   store i16 32, i16* %0, align 2
   %1 = load i8*, i8** @cp, align 4
@@ -48,7 +48,7 @@ entry:
   %3 = load i8, i8* %c, align 4
   %conv.i = sext i16 %2 to i32
   %conv1.i = sext i8 %3 to i32
-  %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
+  %call.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
   ret void
 ; 16_b-LABEL: test:
 ; 16_h-LABEL: test:
@@ -69,7 +69,7 @@ entry:
   store i8 99, i8* %c.i, align 4
   store i16* %s.i, i16** @sp, align 4
   store i8* %c.i, i8** @cp, align 4
-  %call.i.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
+  %call.i.i.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
   %1 = load i16*, i16** @sp, align 4
   store i16 32, i16* %1, align 2
   %2 = load i8*, i8** @cp, align 4
@@ -78,7 +78,7 @@ entry:
   %4 = load i8, i8* %c.i, align 4
   %conv.i.i = sext i16 %3 to i32
   %conv1.i.i = sext i8 %4 to i32
-  %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i.i, i32 %conv1.i.i) nounwind
+  %call.i.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i.i, i32 %conv1.i.i) nounwind
   call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
   call void @llvm.lifetime.end(i64 -1, i8* %c.i) nounwind
   ret i32 0
diff --git a/test/CodeGen/Mips/stldst.ll b/test/CodeGen/Mips/stldst.ll
index 63e1e14..4eef5ec 100644
--- a/test/CodeGen/Mips/stldst.ll
+++ b/test/CodeGen/Mips/stldst.ll
@@ -29,8 +29,8 @@ entry:
   %7 = load i32, i32* @rrrr, align 4
   %add6 = add nsw i32 %7, 6
 
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str, i32 0, i32 0), i32 %sub5, i32 %add6, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) nounwind
-  %call7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i32 %add, i32 %add1, i32 %sub, i32 %add2, i32 %add3, i32 %sub4, i32 %sub5, i32 %add6) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str, i32 0, i32 0), i32 %sub5, i32 %add6, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) nounwind
+  %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i32 %add, i32 %add1, i32 %sub, i32 %add2, i32 %add3, i32 %sub4, i32 %sub5, i32 %add6) nounwind
   ret i32 0
 }
 ; 16:	sw	${{[0-9]+}}, {{[0-9]+}} ( $sp );         # 4-byte Folded Spill
diff --git a/test/CodeGen/Mips/sub1.ll b/test/CodeGen/Mips/sub1.ll
index 4c91252..636ab8f 100644
--- a/test/CodeGen/Mips/sub1.ll
+++ b/test/CodeGen/Mips/sub1.ll
@@ -8,7 +8,7 @@ entry:
   %0 = load i32, i32* @i, align 4
   %sub = sub nsw i32 %0, 5
 ; 16:	addiu	${{[0-9]+}}, -{{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %sub)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/sub2.ll b/test/CodeGen/Mips/sub2.ll
index e978d45..a97f5e9 100644
--- a/test/CodeGen/Mips/sub2.ll
+++ b/test/CodeGen/Mips/sub2.ll
@@ -10,7 +10,7 @@ entry:
   %1 = load i32, i32* @i, align 4
   %sub = sub nsw i32 %0, %1
 ; 16:	subu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %sub)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll
index 01b2d73..6a0d64b 100644
--- a/test/CodeGen/Mips/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall.ll
@@ -136,7 +136,7 @@ entry:
 ; PIC16: jalrc
 ; PIC16: .end caller8_1
 
-  %call = tail call i32 (i32, ...)* @callee8(i32 2, i32 1) nounwind
+  %call = tail call i32 (i32, ...) @callee8(i32 2, i32 1) nounwind
   ret i32 %call
 }
 
@@ -239,7 +239,7 @@ entry:
 ; PIC16: .ent caller13
 ; PIC16: jalrc
 
-  %call = tail call i32 (i32, ...)* @callee13(i32 1, i32 2) nounwind
+  %call = tail call i32 (i32, ...) @callee13(i32 1, i32 2) nounwind
   ret i32 %call
 }
 
diff --git a/test/CodeGen/Mips/xor1.ll b/test/CodeGen/Mips/xor1.ll
index 4fcfc45..dd51f14 100644
--- a/test/CodeGen/Mips/xor1.ll
+++ b/test/CodeGen/Mips/xor1.ll
@@ -10,7 +10,7 @@ entry:
   %1 = load i32, i32* @y, align 4
   %xor = xor i32 %0, %1
 ; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %xor)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %xor)
   ret i32 0
 }
 
diff --git a/test/CodeGen/NVPTX/ptx-version-30.ll b/test/CodeGen/NVPTX/ptx-version-30.ll
deleted file mode 100644
index 0422b01..0000000
--- a/test/CodeGen/NVPTX/ptx-version-30.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx30 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx30 | FileCheck %s
-
-
-; CHECK: .version 3.0
-
diff --git a/test/CodeGen/NVPTX/ptx-version-31.ll b/test/CodeGen/NVPTX/ptx-version-31.ll
deleted file mode 100644
index d6e5730..0000000
--- a/test/CodeGen/NVPTX/ptx-version-31.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx31 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx31 | FileCheck %s
-
-
-; CHECK: .version 3.1
-
diff --git a/test/CodeGen/NVPTX/sm-version-30.ll b/test/CodeGen/NVPTX/sm-version-30.ll
index 692b49a..4f35cf0 100644
--- a/test/CodeGen/NVPTX/sm-version-30.ll
+++ b/test/CodeGen/NVPTX/sm-version-30.ll
@@ -2,5 +2,6 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s
 
 
+; CHECK: .version 3.2
 ; CHECK: .target sm_30
 
diff --git a/test/CodeGen/NVPTX/sm-version-32.ll b/test/CodeGen/NVPTX/sm-version-32.ll
new file mode 100644
index 0000000..d6a5082
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-32.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_32 | FileCheck %s
+
+
+; CHECK: .version 4.0
+; CHECK: .target sm_32
+
diff --git a/test/CodeGen/NVPTX/sm-version-35.ll b/test/CodeGen/NVPTX/sm-version-35.ll
index 25368a0..8456c66 100644
--- a/test/CodeGen/NVPTX/sm-version-35.ll
+++ b/test/CodeGen/NVPTX/sm-version-35.ll
@@ -2,5 +2,6 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
 
 
+; CHECK: .version 3.2
 ; CHECK: .target sm_35
 
diff --git a/test/CodeGen/NVPTX/sm-version-37.ll b/test/CodeGen/NVPTX/sm-version-37.ll
new file mode 100644
index 0000000..fd51a9c
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-37.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_37 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_37 | FileCheck %s
+
+
+; CHECK: .version 4.1
+; CHECK: .target sm_37
+
diff --git a/test/CodeGen/NVPTX/sm-version-50.ll b/test/CodeGen/NVPTX/sm-version-50.ll
new file mode 100644
index 0000000..374c6ea
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-50.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_50 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 | FileCheck %s
+
+
+; CHECK: .version 4.0
+; CHECK: .target sm_50
+
diff --git a/test/CodeGen/NVPTX/sm-version-52.ll b/test/CodeGen/NVPTX/sm-version-52.ll
new file mode 100644
index 0000000..18881b2
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-52.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_52 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 | FileCheck %s
+
+
+; CHECK: .version 4.1
+; CHECK: .target sm_52
+
diff --git a/test/CodeGen/NVPTX/sm-version-53.ll b/test/CodeGen/NVPTX/sm-version-53.ll
new file mode 100644
index 0000000..50d2dec
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-53.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_53 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_53 | FileCheck %s
+
+
+; CHECK: .version 4.2
+; CHECK: .target sm_53
+
diff --git a/test/CodeGen/NVPTX/symbol-naming.ll b/test/CodeGen/NVPTX/symbol-naming.ll
index f8e6bf1..0f17693 100644
--- a/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/test/CodeGen/NVPTX/symbol-naming.ll
@@ -24,7 +24,7 @@ target triple = "nvptx64-unknown-unknown"
 ; Function Attrs: nounwind
 define void @foo(i32 %a, float %b, i8 signext %c, i32 %e) {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
index 002a064..5992ad4 100644
--- a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
@@ -6,7 +6,7 @@ entry:
         %tmp = icmp sgt i64 %tmp1, 2            ; <i1> [#uses=1]
         br i1 %tmp, label %UnifiedReturnBlock, label %cond_true
 cond_true:              ; preds = %entry
-        %tmp.upgrd.1 = tail call i32 (...)* @bar( )             ; <i32> [#uses=0]
+        %tmp.upgrd.1 = tail call i32 (...) @bar( )             ; <i32> [#uses=0]
         ret void
 UnifiedReturnBlock:             ; preds = %entry
         ret void
diff --git a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
index 3d462b4..ab5f37d 100644
--- a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
@@ -10,7 +10,7 @@ entry:
         %tmp = icmp eq i32 %tmp2, 0             ; <i1> [#uses=1]
         br i1 %tmp, label %UnifiedReturnBlock, label %cond_true
 cond_true:              ; preds = %entry
-        tail call i32 (...)* @bar( )            ; <i32>:0 [#uses=0]
+        tail call i32 (...) @bar( )            ; <i32>:0 [#uses=0]
         ret void
 UnifiedReturnBlock:             ; preds = %entry
         ret void
diff --git a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
index 6e8b5b4..5a6fbf0 100644
--- a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
+++ b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
@@ -7,7 +7,7 @@ target triple = "powerpc-apple-darwin8"
 
 define i32 @main() {
 entry:
-        %tmp = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i32 0) )                ; <i32> [#uses=0]
+        %tmp = tail call i32 (i8*, ...) @printf( i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i32 0) )                ; <i32> [#uses=0]
         ret i32 0
 }
 
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index bba3707..b3b7323 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -22,8 +22,8 @@ entry:
 	br i1 %toBool, label %cond_true, label %cond_false
 
 cond_true:		; preds = %entry
-	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp3 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
@@ -31,8 +31,8 @@ cond_true:		; preds = %entry
 	br i1 %toBool10, label %cond_true11, label %cond_false15
 
 cond_false:		; preds = %entry
-	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp5 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...) @baz( i32 5, i32 6 )		; <i32> [#uses=0]
 	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
@@ -40,17 +40,17 @@ cond_false:		; preds = %entry
 	br i1 %toBool210, label %cond_true11, label %cond_false15
 
 cond_true11:		; preds = %cond_next
-	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
-	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_false15:		; preds = %cond_next
-	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
-	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	%tmp16 = call i32 (...) @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...) @quux( i32 3, i32 4 )		; <i32> [#uses=0]
 	br label %cond_next18
 
 cond_next18:		; preds = %cond_false15, %cond_true11
-	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp19 = call i32 (...) @bar( )		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next18
diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index ee1e233..aae914e 100644
--- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -9,7 +9,7 @@ entry:
         %ttype = alloca i32, align 4            ; <i32*> [#uses=1]
         %regs = alloca [1024 x %struct.__db_region], align 16           ; <[1024 x %struct.__db_region]*> [#uses=0]
         %tmp = load i32, i32* %ttype, align 4                ; <i32> [#uses=1]
-        %tmp1 = call i32 (...)* @bork( i32 %tmp )               ; <i32> [#uses=0]
+        %tmp1 = call i32 (...) @bork( i32 %tmp )               ; <i32> [#uses=0]
         ret void
 
 ; CHECK: @foo
diff --git a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index 341b632..ccbadb4 100644
--- a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -42,7 +42,7 @@ entry:
 	%tmp4 = getelementptr <{ i8, double }>, <{ i8, double }>* @v, i32 0, i32 1		; <double*> [#uses=1]
 	%tmp5 = load double, double* %tmp4, align 1		; <double> [#uses=1]
 	%tmp6 = getelementptr [8 x i8], [8 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp7 = call i32 (i8*, ...)* @printf( i8* %tmp6, double %tmp23, double %tmp5 )		; <i32> [#uses=0]
+	%tmp7 = call i32 (i8*, ...) @printf( i8* %tmp6, double %tmp23, double %tmp5 )		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index 09f331f..13b9be3 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -17,11 +17,11 @@
 define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
 entry:
 	%storeCoordinator = alloca %struct.NSPersistentStoreCoordinator*		; <%struct.NSPersistentStoreCoordinator**> [#uses=0]
-	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
+	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
 	%tmp34 = load %struct.NSString*, %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
 	%tmp37 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
-	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=1]
-	%tmp45 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp37( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", %struct.objc_object* %tmp42, %struct.NSString* null )		; <%struct.objc_object*> [#uses=1]
-	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* %tmp45, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
+	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=1]
+	%tmp45 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp37( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", %struct.objc_object* %tmp42, %struct.NSString* null )		; <%struct.objc_object*> [#uses=1]
+	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* %tmp45, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
 	unreachable
 }
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index b27eec4..ff5f835 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -15,11 +15,11 @@
 define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
 entry:
 	%tmp27 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
-	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp27( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
+	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp27( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
 	%tmp33 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
 	%tmp34 = load %struct.NSString*, %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
 	%tmp40 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
-	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp40( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=0]
-	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp33( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* null, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
+	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp40( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=0]
+	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp33( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* null, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
 	unreachable
 }
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index 9158084..45d4399 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -39,7 +39,7 @@ bb41:		; preds = %bb31
 	ret i32 0
 
 bb68:		; preds = %bb31
-	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([68 x i8], [68 x i8]* @.str34, i32 0, i32 0), i64 %tmp34, i64 0, i32 131072 ) nounwind 
+	tail call void (i8*, ...) @IOLog( i8* getelementptr ([68 x i8], [68 x i8]* @.str34, i32 0, i32 0), i64 %tmp34, i64 0, i32 131072 ) nounwind 
 	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
 	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
 	tail call void @IODelay( i32 500 ) nounwind 
@@ -55,7 +55,7 @@ bb65.i:		; preds = %bb68
 	br i1 %tmp67.i, label %_Z24unlock_then_erase_sectory.exit, label %bb70.i
 
 bb70.i:		; preds = %bb65.i
-	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([64 x i8], [64 x i8]* @.str19, i32 0, i32 0), i32 %tmp5455.i ) nounwind 
+	tail call void (i8*, ...) @IOLog( i8* getelementptr ([64 x i8], [64 x i8]* @.str19, i32 0, i32 0), i32 %tmp5455.i ) nounwind 
 	ret i32 0
 
 _Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
@@ -66,11 +66,11 @@ _Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
 	br i1 %tmp100, label %bb31, label %bb103
 
 bb103:		; preds = %_Z24unlock_then_erase_sectory.exit, %bb
-	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([37 x i8], [37 x i8]* @.str35, i32 0, i32 0) ) nounwind 
+	tail call void (i8*, ...) @IOLog( i8* getelementptr ([37 x i8], [37 x i8]* @.str35, i32 0, i32 0) ) nounwind 
 	ret i32 0
 
 bb107:		; preds = %entry
-	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([48 x i8], [48 x i8]* @.str36, i32 0, i32 0) ) nounwind 
+	tail call void (i8*, ...) @IOLog( i8* getelementptr ([48 x i8], [48 x i8]* @.str36, i32 0, i32 0) ) nounwind 
 	%tmp114115 = bitcast i8* %buffer to i16*		; <i16*> [#uses=1]
 	%tmp256 = lshr i64 %bufferSize, 1		; <i64> [#uses=1]
 	%tmp256257 = trunc i64 %tmp256 to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
index 9de1c7f..ee3d0f4 100644
--- a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
+++ b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
@@ -4,7 +4,7 @@
 
 define void @llvm_static_func(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15) nounwind  {
 entry:
-	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i64 0), i32 %a8 ) nounwind 		; <i32>:0 [#uses=0]
+	tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i64 0), i32 %a8 ) nounwind 		; <i32>:0 [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index d98080b..b107600 100644
--- a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -169,19 +169,19 @@ bb2315:		; preds = %bb2295
 
 bb2317:		; preds = %bb2315
 	%115 = load i64, i64* %2, align 16		; <i64> [#uses=1]
-	%116 = call i32 (...)* @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%116 = call i32 (...) @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
 	%117 = sext i32 %116 to i64		; <i64> [#uses=1]
 	store i64 %117, i64* %2, align 16
 	%118 = load i64, i64* %3, align 8		; <i64> [#uses=1]
-	%119 = call i32 (...)* @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%119 = call i32 (...) @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
 	%120 = sext i32 %119 to i64		; <i64> [#uses=1]
 	store i64 %120, i64* %3, align 8
 	%121 = load i64, i64* %4, align 16		; <i64> [#uses=1]
-	%122 = call i32 (...)* @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%122 = call i32 (...) @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
 	%123 = sext i32 %122 to i64		; <i64> [#uses=1]
 	store i64 %123, i64* %4, align 16
 	%124 = load i64, i64* %5, align 8		; <i64> [#uses=1]
-	%125 = call i32 (...)* @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=0]
+	%125 = call i32 (...) @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=0]
 	unreachable
 
 bb2318:		; preds = %bb2315
@@ -190,29 +190,29 @@ bb2318:		; preds = %bb2315
 	%128 = load i64, i64* %127, align 8		; <i64> [#uses=1]
 	%129 = trunc i64 %128 to i32		; <i32> [#uses=4]
 	%130 = load i64, i64* %2, align 16		; <i64> [#uses=1]
-	%131 = call i32 (...)* @_u16_ff( i64 %130, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%131 = call i32 (...) @_u16_ff( i64 %130, i32 %129 ) nounwind		; <i32> [#uses=1]
 	%132 = sext i32 %131 to i64		; <i64> [#uses=1]
 	store i64 %132, i64* %2, align 16
 	%133 = load i64, i64* %3, align 8		; <i64> [#uses=1]
-	%134 = call i32 (...)* @_u16_ff( i64 %133, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%134 = call i32 (...) @_u16_ff( i64 %133, i32 %129 ) nounwind		; <i32> [#uses=1]
 	%135 = sext i32 %134 to i64		; <i64> [#uses=1]
 	store i64 %135, i64* %3, align 8
 	%136 = load i64, i64* %4, align 16		; <i64> [#uses=1]
-	%137 = call i32 (...)* @_u16_ff( i64 %136, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%137 = call i32 (...) @_u16_ff( i64 %136, i32 %129 ) nounwind		; <i32> [#uses=1]
 	%138 = sext i32 %137 to i64		; <i64> [#uses=1]
 	store i64 %138, i64* %4, align 16
 	%139 = load i64, i64* %5, align 8		; <i64> [#uses=1]
-	%140 = call i32 (...)* @_u16_ff( i64 %139, i32 %129 ) nounwind		; <i32> [#uses=0]
+	%140 = call i32 (...) @_u16_ff( i64 %139, i32 %129 ) nounwind		; <i32> [#uses=0]
 	unreachable
 
 bb2319:		; preds = %bb2326
 	%141 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 2		; <i8**> [#uses=1]
 	%142 = load i8*, i8** %141, align 4		; <i8*> [#uses=4]
 	%143 = getelementptr i8, i8* %142, i32 0		; <i8*> [#uses=1]
-	%144 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind		; <i32> [#uses=1]
+	%144 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind		; <i32> [#uses=1]
 	%145 = sext i32 %144 to i64		; <i64> [#uses=2]
 	%146 = getelementptr i8, i8* %142, i32 0		; <i8*> [#uses=1]
-	%147 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %146 ) nounwind		; <i32> [#uses=1]
+	%147 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %146 ) nounwind		; <i32> [#uses=1]
 	%148 = sext i32 %147 to i64		; <i64> [#uses=2]
 	%149 = shl i64 %145, 48		; <i64> [#uses=0]
 	%150 = shl i64 %148, 32		; <i64> [#uses=1]
@@ -220,10 +220,10 @@ bb2319:		; preds = %bb2326
 	store i64 %145, i64* %2, align 16
 	store i64 %148, i64* %3, align 8
 	%152 = getelementptr i8, i8* %142, i32 0		; <i8*> [#uses=1]
-	%153 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %152 ) nounwind		; <i32> [#uses=1]
+	%153 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %152 ) nounwind		; <i32> [#uses=1]
 	%154 = sext i32 %153 to i64		; <i64> [#uses=0]
 	%155 = getelementptr i8, i8* %142, i32 0		; <i8*> [#uses=1]
-	%156 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %155 ) nounwind		; <i32> [#uses=0]
+	%156 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %155 ) nounwind		; <i32> [#uses=0]
 	unreachable
 
 bb2325:		; preds = %bb2326, %bb2295
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index d4972a9..0599b74 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -12,7 +12,7 @@ entry:
 ; CHECK: mtctr r12
 ; CHECK: bctrl
   %0 = load void (...)*, void (...)** @p, align 4              ; <void (...)*> [#uses=1]
-  call void (...)* %0() nounwind
+  call void (...) %0() nounwind
   br label %return
 
 return:                                           ; preds = %entry
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
index a488e68..e592091 100644
--- a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -66,7 +66,7 @@ for.end12:                                        ; preds = %for.end.7, %for.end
   %sub14 = sub nsw i64 %call13, %call1
   %conv = sitofp i64 %sub14 to double
   %div = fdiv double %conv, 1.000000e+06
-  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
+  %call15 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
   tail call void @check(i32 1)
   ret i32 0
 
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
index 84c9989..9347682 100644
--- a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -204,7 +204,7 @@ for.end23:                                        ; preds = %for.end17
   %sub = sub nsw i64 %call24, %call1
   %conv25 = sitofp i64 %sub to double
   %div = fdiv double %conv25, 1.000000e+06
-  %call26 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
+  %call26 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
   %add29 = fadd float %add, 1.000000e+00
   %add31 = fadd float %add29, %conv18
   %add32 = fadd float %add31, 1.000000e+00
diff --git a/test/CodeGen/PowerPC/and-branch.ll b/test/CodeGen/PowerPC/and-branch.ll
index 0484f88..1543205 100644
--- a/test/CodeGen/PowerPC/and-branch.ll
+++ b/test/CodeGen/PowerPC/and-branch.ll
@@ -7,7 +7,7 @@ entry:
         %tmp4 = and i1 %tmp3, %tmp              ; <i1> [#uses=1]
         br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
 cond_true:              ; preds = %entry
-        %tmp5 = tail call i32 (...)* @bar( )            ; <i32> [#uses=0]
+        %tmp5 = tail call i32 (...) @bar( )            ; <i32> [#uses=0]
         ret void
 UnifiedReturnBlock:             ; preds = %entry
         ret void
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 9130921..b4b95a2 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc64 | FileCheck %s
-; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=CHECK-P7U
-; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P7U
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U
 
 define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 ; CHECK-LABEL: exchange_and_add:
@@ -12,17 +12,17 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 
 define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
 ; CHECK-LABEL: exchange_and_add8:
-; CHECK-P7U: lbarx
+; CHECK-P8U: lbarx
   %tmp = atomicrmw add i8* %mem, i8 %val monotonic
-; CHECK-P7U: stbcx.
+; CHECK-P8U: stbcx.
   ret i8 %tmp
 }
 
 define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
 ; CHECK-LABEL: exchange_and_add16:
-; CHECK-P7U: lharx
+; CHECK-P8U: lharx
   %tmp = atomicrmw add i16* %mem, i16 %val monotonic
-; CHECK-P7U: sthcx.
+; CHECK-P8U: sthcx.
   ret i16 %tmp
 }
 
@@ -38,21 +38,21 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {
 
 define i8 @exchange_and_cmp8(i8* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp8:
-; CHECK-P7U: lbarx
+; CHECK-P8U: lbarx
   %tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
   %tmp = extractvalue { i8, i1 } %tmppair, 0
-; CHECK-P7U: stbcx.
-; CHECK-P7U: stbcx.
+; CHECK-P8U: stbcx.
+; CHECK-P8U: stbcx.
   ret i8 %tmp
 }
 
 define i16 @exchange_and_cmp16(i16* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp16:
-; CHECK-P7U: lharx
+; CHECK-P8U: lharx
   %tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
   %tmp = extractvalue { i16, i1 } %tmppair, 0
-; CHECK-P7U: sthcx.
-; CHECK-P7U: sthcx.
+; CHECK-P8U: sthcx.
+; CHECK-P8U: sthcx.
   ret i16 %tmp
 }
 
@@ -66,17 +66,17 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
 
 define i8 @exchange8(i8* %mem, i8 %val) nounwind {
 ; CHECK-LABEL: exchange8:
-; CHECK-P7U: lbarx
+; CHECK-P8U: lbarx
   %tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
-; CHECK-P7U: stbcx.
+; CHECK-P8U: stbcx.
   ret i8 %tmp
 }
 
 define i16 @exchange16(i16* %mem, i16 %val) nounwind {
 ; CHECK-LABEL: exchange16:
-; CHECK-P7U: lharx
+; CHECK-P8U: lharx
   %tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
-; CHECK-P7U: sthcx.
+; CHECK-P8U: sthcx.
   ret i16 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
index dda1538..d6928dd 100644
--- a/test/CodeGen/PowerPC/branch-opt.ll
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -11,7 +11,7 @@ entry:
 	br i1 %tmp1.upgrd.1, label %cond_false, label %bb5
 bb:		; preds = %bb5, %bb
 	%indvar77 = phi i32 [ %indvar.next78, %bb ], [ 0, %bb5 ]		; <i32> [#uses=1]
-	%tmp2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp2 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	%indvar.next78 = add i32 %indvar77, 1		; <i32> [#uses=2]
 	%exitcond79 = icmp eq i32 %indvar.next78, %X		; <i1> [#uses=1]
 	br i1 %exitcond79, label %cond_next48, label %bb
@@ -24,7 +24,7 @@ cond_false:		; preds = %entry
 	br i1 %tmp10.upgrd.2, label %cond_false20, label %bb16
 bb12:		; preds = %bb16, %bb12
 	%indvar72 = phi i32 [ %indvar.next73, %bb12 ], [ 0, %bb16 ]		; <i32> [#uses=1]
-	%tmp13 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp13 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	%indvar.next73 = add i32 %indvar72, 1		; <i32> [#uses=2]
 	%exitcond74 = icmp eq i32 %indvar.next73, %Y		; <i1> [#uses=1]
 	br i1 %exitcond74, label %cond_next48, label %bb12
@@ -37,7 +37,7 @@ cond_false20:		; preds = %cond_false
 	br i1 %tmp23.upgrd.3, label %cond_false33, label %bb29
 bb25:		; preds = %bb29, %bb25
 	%indvar67 = phi i32 [ %indvar.next68, %bb25 ], [ 0, %bb29 ]		; <i32> [#uses=1]
-	%tmp26 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp26 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	%indvar.next68 = add i32 %indvar67, 1		; <i32> [#uses=2]
 	%exitcond69 = icmp eq i32 %indvar.next68, %Z		; <i1> [#uses=1]
 	br i1 %exitcond69, label %cond_next48, label %bb25
@@ -49,7 +49,7 @@ cond_false33:		; preds = %cond_false20
 	%tmp36.upgrd.4 = icmp eq i32 %tmp36, 0		; <i1> [#uses=1]
 	br i1 %tmp36.upgrd.4, label %cond_next48, label %bb42
 bb38:		; preds = %bb42
-	%tmp39 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp39 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
 	br label %bb42
 bb42:		; preds = %bb38, %cond_false33
@@ -62,7 +62,7 @@ cond_next48:		; preds = %bb42, %cond_false33, %bb29, %bb25, %bb16, %bb12, %bb5,
 	%tmp50 = icmp eq i32 %W_addr.1, 0		; <i1> [#uses=1]
 	br i1 %tmp50, label %UnifiedReturnBlock, label %cond_true51
 cond_true51:		; preds = %cond_next48
-	%tmp52 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp52 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	ret void
 UnifiedReturnBlock:		; preds = %cond_next48
 	ret void
diff --git a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
index 477cf2f..2b3ab9b 100644
--- a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
+++ b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
@@ -7,14 +7,14 @@ target triple = "powerpc-unknown-linux"
 define void @test(i32 %count) nounwind {
 entry:
 ; CHECK: crxor 6, 6, 6
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
   %cmp2 = icmp sgt i32 %count, 0
   br i1 %cmp2, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
 ; CHECK: crxor 6, 6, 6
-  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
   %inc = add nsw i32 %i.03, 1
   %exitcond = icmp eq i32 %inc, %count
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/PowerPC/cr1eq.ll b/test/CodeGen/PowerPC/cr1eq.ll
index 5ffc1a2..43cd454 100644
--- a/test/CodeGen/PowerPC/cr1eq.ll
+++ b/test/CodeGen/PowerPC/cr1eq.ll
@@ -9,9 +9,9 @@ target triple = "powerpc-unknown-freebsd"
 define void @foo() nounwind {
 entry:
 ; CHECK: crxor 6, 6, 6
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 1)
 ; CHECK: creqv 6, 6, 6
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), double 1.100000e+00)
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), double 1.100000e+00)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/cr_spilling.ll b/test/CodeGen/PowerPC/cr_spilling.ll
index fdbcf4b..8ac4e72 100644
--- a/test/CodeGen/PowerPC/cr_spilling.ll
+++ b/test/CodeGen/PowerPC/cr_spilling.ll
@@ -10,7 +10,7 @@ entry:
 	br i1 false, label %cond_true94, label %cond_next99
 
 cond_true94:		; preds = %entry
-	%tmp98 = call i32 (i8*, ...)* @printf(i8* getelementptr ([3 x i8], [3 x i8]* @.str242, i32 0, i32 0), i8* null)		; <i32> [#uses=0]
+	%tmp98 = call i32 (i8*, ...) @printf(i8* getelementptr ([3 x i8], [3 x i8]* @.str242, i32 0, i32 0), i8* null)		; <i32> [#uses=0]
 	%tmp20971 = icmp sgt i32 %tmp86, 0		; <i1> [#uses=1]
 	br i1 %tmp20971, label %bb101, label %bb212
 
diff --git a/test/CodeGen/PowerPC/ctrloop-sums.ll b/test/CodeGen/PowerPC/ctrloop-sums.ll
index 4d8488c..056ee34 100644
--- a/test/CodeGen/PowerPC/ctrloop-sums.ll
+++ b/test/CodeGen/PowerPC/ctrloop-sums.ll
@@ -119,7 +119,7 @@ for.body3.lr.ph.us.i:                             ; preds = %for.inc17, %for.inc
   br label %for.body3.us.i
 
 SumArray.exit:                                    ; preds = %for.inc6.us.i
-  %call20 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind
+  %call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind
   ret i32 0
 
 ; CHECK: @main
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index a64d58f..f09fc4f 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -17,7 +17,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.1", isOptimized: true, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !"")
+!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.1", isOptimized: true, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !MDSubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !21, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !13)
diff --git a/test/CodeGen/PowerPC/div-e-32.ll b/test/CodeGen/PowerPC/div-e-32.ll
new file mode 100644
index 0000000..588756b
--- /dev/null
+++ b/test/CodeGen/PowerPC/div-e-32.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define signext i32 @test1() #0 {
+entry:
+  %0 = call i32 @llvm.ppc.divwe(i32 32, i32 16)
+  ret i32 %0
+; CHECK: divwe 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divwe(i32, i32) #1
+
+; Function Attrs: nounwind
+define signext i32 @test2() #0 {
+entry:
+  %0 = call i32 @llvm.ppc.divweu(i32 32, i32 16)
+  ret i32 %0
+; CHECK: divweu 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divweu(i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 231831) (llvm/trunk 231828:231843M)"}
diff --git a/test/CodeGen/PowerPC/div-e-all.ll b/test/CodeGen/PowerPC/div-e-all.ll
new file mode 100644
index 0000000..912deeb
--- /dev/null
+++ b/test/CodeGen/PowerPC/div-e-all.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define signext i32 @test1() #0 {
+entry:
+  %0 = call i32 @llvm.ppc.divwe(i32 32, i32 16)
+  ret i32 %0
+; CHECK: divwe 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divwe(i32, i32) #1
+
+; Function Attrs: nounwind
+define signext i32 @test2() #0 {
+entry:
+  %0 = call i32 @llvm.ppc.divweu(i32 32, i32 16)
+  ret i32 %0
+; CHECK: divweu 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divweu(i32, i32) #1
+
+; Function Attrs: nounwind
+define i64 @test3() #0 {
+entry:
+  %0 = call i64 @llvm.ppc.divde(i64 32, i64 16)
+  ret i64 %0
+; CHECK: divde 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.ppc.divde(i64, i64) #1
+
+; Function Attrs: nounwind
+define i64 @test4() #0 {
+entry:
+  %0 = call i64 @llvm.ppc.divdeu(i64 32, i64 16)
+  ret i64 %0
+; CHECK: divdeu 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.ppc.divdeu(i64, i64) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 231831) (llvm/trunk 231828:231843M)"}
diff --git a/test/CodeGen/PowerPC/f32-to-i64.ll b/test/CodeGen/PowerPC/f32-to-i64.ll
new file mode 100644
index 0000000..c138188
--- /dev/null
+++ b/test/CodeGen/PowerPC/f32-to-i64.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-unknown"
+
+; Function Attrs: nounwind
+define i64 @testullf(float %arg) #0 {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptoui float %0 to i64
+  ret i64 %conv
+
+; CHECK-LABEL: @testullf
+; CHECK: fctiduz [[REG1:[0-9]+]], 1
+; CHECK: stfd [[REG1]], [[OFF:[0-9]+]](1)
+; CHECK-DAG: lwz 3, [[OFF]](1)
+; CHECK-DAG: lwz 4, {{[0-9]+}}(1)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="a2" }
+
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll b/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
index ab439cd..8a873da 100644
--- a/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
+++ b/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
@@ -1,15 +1,12 @@
-;; There are some known limitations in the VSX support during FastIsel 
-;; (see fast-isel-load-store.ll header). Nevertheless, we are adding some 
-;; regressions here for bugs we fix in the meantime
 ; RUN: llc < %s -O0 -fast-isel -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64VSX
 
 ;; The semantics of VSX stores for when R0 is used is different depending on
 ;; whether it is used as base or offset. If used as base, the effective
-;; address computation will use zero regardless the content of R0. If used as
-;; offset, the content will be used in the effective address. We observed that
+;; address computation will use zero regardless of the content of R0. If used as
+;; an offset the content will be used in the effective address. We observed that
 ;; for some constructors, the initialization values were being stored without
-;; any offset register being specified which was causing R0 to be used as offset
-;; in regions where it contained the value in the link register. This regression
+;; an offset register being specified which was causing R0 to be used as offset
+;; in regions where it contained the value in the link register. This test
 ;; verifies that R0 is used as base in these situations.
 
 %SomeStruct = type { double }
@@ -28,6 +25,4 @@ entry:
   %0 = load double, double* %V.addr, align 8
   store double %0, double* %Val, align 8
   ret void
- } 
-
-
+ }
diff --git a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
new file mode 100644
index 0000000..1d9b648
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
@@ -0,0 +1,426 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z6testcff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptoui float %0 to i8
+  ret i8 %conv
+; CHECK-LABEL: @_Z6testcff
+; CHECK: xscvdpsxws [[CONVREG01:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG01]]
+}
+
+; Function Attrs: nounwind
+define float @_Z6testfcc(i8 zeroext %arg) {
+entry:
+  %arg.addr = alloca i8, align 1
+  store i8 %arg, i8* %arg.addr, align 1
+  %0 = load i8, i8* %arg.addr, align 1
+  %conv = uitofp i8 %0 to float
+  ret float %conv
+; CHECK-LABEL: @_Z6testfcc
+; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG01]]
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z6testcdd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptoui double %0 to i8
+  ret i8 %conv
+; CHECK-LABEL: @_Z6testcdd
+; CHECK: xscvdpsxws [[CONVREG02:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG02]]
+}
+
+; Function Attrs: nounwind
+define double @_Z6testdcc(i8 zeroext %arg) {
+entry:
+  %arg.addr = alloca i8, align 1
+  store i8 %arg, i8* %arg.addr, align 1
+  %0 = load i8, i8* %arg.addr, align 1
+  %conv = uitofp i8 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z6testdcc
+; CHECK: mtvsrwz [[MOVEREG02:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG02]]
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z7testucff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptoui float %0 to i8
+  ret i8 %conv
+; CHECK-LABEL: @_Z7testucff
+; CHECK: xscvdpsxws [[CONVREG03:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG03]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfuch(i8 zeroext %arg) {
+entry:
+  %arg.addr = alloca i8, align 1
+  store i8 %arg, i8* %arg.addr, align 1
+  %0 = load i8, i8* %arg.addr, align 1
+  %conv = uitofp i8 %0 to float
+  ret float %conv
+; CHECK-LABEL: @_Z7testfuch
+; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG03]]
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z7testucdd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptoui double %0 to i8
+  ret i8 %conv
+; CHECK-LABEL: @_Z7testucdd
+; CHECK: xscvdpsxws [[CONVREG04:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG04]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testduch(i8 zeroext %arg) {
+entry:
+  %arg.addr = alloca i8, align 1
+  store i8 %arg, i8* %arg.addr, align 1
+  %0 = load i8, i8* %arg.addr, align 1
+  %conv = uitofp i8 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z7testduch
+; CHECK: mtvsrwz [[MOVEREG04:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG04]]
+}
+
+; Function Attrs: nounwind
+define signext i16 @_Z6testsff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptosi float %0 to i16
+  ret i16 %conv
+; CHECK-LABEL: @_Z6testsff
+; CHECK: xscvdpsxws [[CONVREG05:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG05]]
+}
+
+; Function Attrs: nounwind
+define float @_Z6testfss(i16 signext %arg) {
+entry:
+  %arg.addr = alloca i16, align 2
+  store i16 %arg, i16* %arg.addr, align 2
+  %0 = load i16, i16* %arg.addr, align 2
+  %conv = sitofp i16 %0 to float
+  ret float %conv
+; CHECK-LABEL: @_Z6testfss
+; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3
+; FIXME: Once we have XSCVSXDSP implemented, this will change
+; CHECK: fcfids 1, [[MOVEREG05]]
+}
+
+; Function Attrs: nounwind
+define signext i16 @_Z6testsdd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptosi double %0 to i16
+  ret i16 %conv
+; CHECK-LABEL: @_Z6testsdd
+; CHECK: xscvdpsxws [[CONVREG06:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG06]]
+}
+
+; Function Attrs: nounwind
+define double @_Z6testdss(i16 signext %arg) {
+entry:
+  %arg.addr = alloca i16, align 2
+  store i16 %arg, i16* %arg.addr, align 2
+  %0 = load i16, i16* %arg.addr, align 2
+  %conv = sitofp i16 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z6testdss
+; CHECK: mtvsrwa [[MOVEREG06:[0-9]+]], 3
+; CHECK: xscvsxddp 1, [[MOVEREG06]]
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @_Z7testusff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptoui float %0 to i16
+  ret i16 %conv
+; CHECK-LABEL: @_Z7testusff
+; CHECK: xscvdpsxws [[CONVREG07:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG07]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfust(i16 zeroext %arg) {
+entry:
+  %arg.addr = alloca i16, align 2
+  store i16 %arg, i16* %arg.addr, align 2
+  %0 = load i16, i16* %arg.addr, align 2
+  %conv = uitofp i16 %0 to float
+  ret float %conv
+; CHECK-LABEL: @_Z7testfust
+; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG07]]
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @_Z7testusdd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptoui double %0 to i16
+  ret i16 %conv
+; CHECK-LABEL: @_Z7testusdd
+; CHECK: xscvdpsxws [[CONVREG08:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG08]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testdust(i16 zeroext %arg) {
+entry:
+  %arg.addr = alloca i16, align 2
+  store i16 %arg, i16* %arg.addr, align 2
+  %0 = load i16, i16* %arg.addr, align 2
+  %conv = uitofp i16 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z7testdust
+; CHECK: mtvsrwz [[MOVEREG08:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG08]]
+}
+
+; Function Attrs: nounwind
+define signext i32 @_Z6testiff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptosi float %0 to i32
+  ret i32 %conv
+; CHECK-LABEL: @_Z6testiff
+; CHECK: xscvdpsxws [[CONVREG09:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG09]]
+}
+
+; Function Attrs: nounwind
+define float @_Z6testfii(i32 signext %arg) {
+entry:
+  %arg.addr = alloca i32, align 4
+  store i32 %arg, i32* %arg.addr, align 4
+  %0 = load i32, i32* %arg.addr, align 4
+  %conv = sitofp i32 %0 to float
+  ret float %conv
+; CHECK-LABEL: @_Z6testfii
+; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3
+; FIXME: Once we have XSCVSXDSP implemented, this will change
+; CHECK: fcfids 1, [[MOVEREG09]]
+}
+
+; Function Attrs: nounwind
+define signext i32 @_Z6testidd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptosi double %0 to i32
+  ret i32 %conv
+; CHECK-LABEL: @_Z6testidd
+; CHECK: xscvdpsxws [[CONVREG10:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG10]]
+}
+
+; Function Attrs: nounwind
+define double @_Z6testdii(i32 signext %arg) {
+entry:
+  %arg.addr = alloca i32, align 4
+  store i32 %arg, i32* %arg.addr, align 4
+  %0 = load i32, i32* %arg.addr, align 4
+  %conv = sitofp i32 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z6testdii
+; CHECK: mtvsrwa [[MOVEREG10:[0-9]+]], 3
+; CHECK: xscvsxddp 1, [[MOVEREG10]]
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @_Z7testuiff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptoui float %0 to i32
+  ret i32 %conv
+; CHECK-LABEL: @_Z7testuiff
+; CHECK: xscvdpuxws [[CONVREG11:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG11]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfuij(i32 zeroext %arg) {
+entry:
+  %arg.addr = alloca i32, align 4
+  store i32 %arg, i32* %arg.addr, align 4
+  %0 = load i32, i32* %arg.addr, align 4
+  %conv = uitofp i32 %0 to float
+  ret float %conv
+; CHECK-LABEL: @_Z7testfuij
+; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG11]]
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @_Z7testuidd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptoui double %0 to i32
+  ret i32 %conv
+; CHECK-LABEL: @_Z7testuidd
+; CHECK: xscvdpuxws [[CONVREG12:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG12]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testduij(i32 zeroext %arg) {
+entry:
+  %arg.addr = alloca i32, align 4
+  store i32 %arg, i32* %arg.addr, align 4
+  %0 = load i32, i32* %arg.addr, align 4
+  %conv = uitofp i32 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z7testduij
+; CHECK: mtvsrwz [[MOVEREG12:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG12]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z7testllff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptosi float %0 to i64
+  ret i64 %conv
+; CHECK-LABEL: @_Z7testllff
+; CHECK: xscvdpsxds [[CONVREG13:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG13]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfllx(i64 %arg) {
+entry:
+  %arg.addr = alloca i64, align 8
+  store i64 %arg, i64* %arg.addr, align 8
+  %0 = load i64, i64* %arg.addr, align 8
+  %conv = sitofp i64 %0 to float
+  ret float %conv
+; CHECK-LABEL:@_Z7testfllx
+; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3
+; FIXME: Once we have XSCVSXDSP implemented, this will change
+; CHECK: fcfids 1, [[MOVEREG13]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z7testlldd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptosi double %0 to i64
+  ret i64 %conv
+; CHECK-LABEL: @_Z7testlldd
+; CHECK: xscvdpsxds [[CONVREG14:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG14]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testdllx(i64 %arg) {
+entry:
+  %arg.addr = alloca i64, align 8
+  store i64 %arg, i64* %arg.addr, align 8
+  %0 = load i64, i64* %arg.addr, align 8
+  %conv = sitofp i64 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z7testdllx
+; CHECK: mtvsrd [[MOVEREG14:[0-9]+]], 3
+; CHECK: xscvsxddp 1, [[MOVEREG14]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z8testullff(float %arg) {
+entry:
+  %arg.addr = alloca float, align 4
+  store float %arg, float* %arg.addr, align 4
+  %0 = load float, float* %arg.addr, align 4
+  %conv = fptoui float %0 to i64
+  ret i64 %conv
+; CHECK-LABEL: @_Z8testullff
+; CHECK: xscvdpuxds [[CONVREG15:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG15]]
+}
+
+; Function Attrs: nounwind
+define float @_Z8testfully(i64 %arg) {
+entry:
+  %arg.addr = alloca i64, align 8
+  store i64 %arg, i64* %arg.addr, align 8
+  %0 = load i64, i64* %arg.addr, align 8
+  %conv = uitofp i64 %0 to float
+  ret float %conv
+; CHECK-LABEL: @_Z8testfully
+; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG15]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z8testulldd(double %arg) {
+entry:
+  %arg.addr = alloca double, align 8
+  store double %arg, double* %arg.addr, align 8
+  %0 = load double, double* %arg.addr, align 8
+  %conv = fptoui double %0 to i64
+  ret i64 %conv
+; CHECK-LABEL: @_Z8testulldd
+; CHECK: xscvdpuxds [[CONVREG16:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG16]]
+}
+
+; Function Attrs: nounwind
+define double @_Z8testdully(i64 %arg) {
+entry:
+  %arg.addr = alloca i64, align 8
+  store i64 %arg, i64* %arg.addr, align 8
+  %0 = load i64, i64* %arg.addr, align 8
+  %conv = uitofp i64 %0 to double
+  ret double %conv
+; CHECK-LABEL: @_Z8testdully
+; CHECK: mtvsrd [[MOVEREG16:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG16]]
+}
diff --git a/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll b/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll
new file mode 100644
index 0000000..adcc7b9
--- /dev/null
+++ b/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @foo(double* %x, double* nocapture readonly %y) #0 {
+entry:
+  %scevgep = getelementptr double, double* %x, i64 1599
+  %scevgep20 = getelementptr double, double* %y, i64 1599
+  br label %vector.memcheck
+
+vector.memcheck:                                  ; preds = %for.end, %entry
+  %j.015 = phi i32 [ 0, %entry ], [ %inc7, %for.end ]
+  %bound0 = icmp uge double* %scevgep20, %x
+  %bound1 = icmp uge double* %scevgep, %y
+  %memcheck.conflict = and i1 %bound0, %bound1
+  br i1 %memcheck.conflict, label %middle.block, label %vector.body
+
+vector.body:                                      ; preds = %vector.memcheck, %vector.body
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %vector.memcheck ]
+  %0 = getelementptr inbounds double, double* %y, i64 %index
+  %1 = bitcast double* %0 to <4 x double>*
+  %wide.load = load <4 x double>, <4 x double>* %1, align 8
+  %2 = fadd <4 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %3 = getelementptr inbounds double, double* %x, i64 %index
+  %4 = bitcast double* %3 to <4 x double>*
+  store <4 x double> %2, <4 x double>* %4, align 8
+  %index.next = add i64 %index, 4
+  %5 = icmp eq i64 %index.next, 1600
+  br i1 %5, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body, %vector.memcheck
+  %resume.val = phi i1 [ false, %vector.memcheck ], [ true, %vector.body ]
+  %trunc.resume.val = phi i64 [ 0, %vector.memcheck ], [ 1600, %vector.body ]
+  br i1 %resume.val, label %for.end, label %for.body3
+
+for.body3:                                        ; preds = %middle.block, %for.body3
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ %trunc.resume.val, %middle.block ]
+  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
+  %6 = load double, double* %arrayidx, align 8
+  %add = fadd double %6, 1.000000e+00
+  %arrayidx5 = getelementptr inbounds double, double* %x, i64 %indvars.iv
+  store double %add, double* %arrayidx5, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.end, label %for.body3
+
+for.end:                                          ; preds = %middle.block, %for.body3
+  tail call void @bar(double* %x) #2
+  %inc7 = add nuw nsw i32 %j.015, 1
+  %exitcond16 = icmp eq i32 %inc7, 100
+  br i1 %exitcond16, label %for.end8, label %vector.memcheck
+
+for.end8:                                         ; preds = %for.end
+  ret void
+
+; CHECK-LABEL: @foo
+; CHECK: dcbt
+}
+
+declare void @bar(double*) #1
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { "target-cpu"="a2q" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/loop-prep-all.ll b/test/CodeGen/PowerPC/loop-prep-all.ll
new file mode 100644
index 0000000..895daff
--- /dev/null
+++ b/test/CodeGen/PowerPC/loop-prep-all.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-bgq-linux < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BGQ
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* nocapture %x, double* nocapture readonly %y) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
+  %0 = load double, double* %arrayidx, align 8
+  %add = fadd double %0, 1.000000e+00
+  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
+  store double %add, double* %arrayidx2, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond19 = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond19, label %for.body7, label %for.body
+
+; CHECK-LABEL: @foo
+
+; CHECK-BGQ-DAG: dcbt 4, 5
+; CHECK-DAG: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
+; CHECK-DAG: fadd [[REG2:[0-9]+]], [[REG1]], 0
+; CHECK-DAG: stfdu [[REG2]], 8({{[0-9]+}})
+; CHECK: bdnz
+
+; CHECK: blr
+
+for.cond.cleanup6:                                ; preds = %for.body7
+  ret void
+
+for.body7:                                        ; preds = %for.body, %for.body7
+  %i3.017 = phi i32 [ %inc9, %for.body7 ], [ 0, %for.body ]
+  tail call void bitcast (void (...)* @bar to void ()*)() #2
+  %inc9 = add nuw nsw i32 %i3.017, 1
+  %exitcond = icmp eq i32 %inc9, 1024
+  br i1 %exitcond, label %for.cond.cleanup6, label %for.body7
+}
+
+declare void @bar(...) #1
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { "target-cpu"="a2q" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
index f31d852..36c5856 100644
--- a/test/CodeGen/PowerPC/mcm-obj-2.ll
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -20,7 +20,7 @@ entry:
 ; accessing function-scoped variable si.
 ;
 ; CHECK: Relocations [
-; CHECK:   Section (2) .rela.text {
+; CHECK:   Section {{.*}} .rela.text {
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM2]]
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index 770ef35..46295cf 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -22,12 +22,12 @@ entry:
 ; accessing external variable ei.
 ;
 ; MEDIUM:      Relocations [
-; MEDIUM:        Section (2) .rela.text {
+; MEDIUM:        Section {{.*}} .rela.text {
 ; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
 ; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 ;
 ; LARGE:       Relocations [
-; LARGE:         Section (2) .rela.text {
+; LARGE:         Section {{.*}} .rela.text {
 ; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
 ; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 
diff --git a/test/CodeGen/PowerPC/memset-nc-le.ll b/test/CodeGen/PowerPC/memset-nc-le.ll
new file mode 100644
index 0000000..af8e9c3
--- /dev/null
+++ b/test/CodeGen/PowerPC/memset-nc-le.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le"
+
+; Function Attrs: nounwind
+define void @test_vsx() unnamed_addr #0 align 2 {
+entry:
+  %0 = load i32, i32* undef, align 4
+  %1 = trunc i32 %0 to i8
+  call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: @test_vsx
+; CHECK: stxvd2x
+; CHECK: stxvd2x
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind "target-cpu"="pwr8" }
+attributes #1 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/memset-nc.ll b/test/CodeGen/PowerPC/memset-nc.ll
new file mode 100644
index 0000000..414a987
--- /dev/null
+++ b/test/CodeGen/PowerPC/memset-nc.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -O0 < %s | FileCheck %s -check-prefix=CHECK-O0
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @test_qpx() unnamed_addr #0 align 2 {
+entry:
+  %0 = load i32, i32* undef, align 4
+  %1 = trunc i32 %0 to i8
+  call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 64, i32 32, i1 false)
+  ret void
+
+; CHECK-LABEL: @test_qpx
+; CHECK: qvstfdx
+; CHECK: qvstfdx
+; CHECK: blr
+
+; CHECK-O0-LABEL: @test_qpx
+; CHECK-O0-NOT: qvstfdx
+; CHECK-O0: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+; Function Attrs: nounwind
+define void @test_vsx() unnamed_addr #2 align 2 {
+entry:
+  %0 = load i32, i32* undef, align 4
+  %1 = trunc i32 %0 to i8
+  call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: @test_vsx
+; CHECK: stxvw4x
+; CHECK: stxvw4x
+; CHECK: blr
+
+; CHECK-O0-LABEL: @test_vsx
+; CHECK-O0-NOT: stxvw4x
+; CHECK-O0: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind "target-cpu"="pwr7" }
+
diff --git a/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll b/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll
new file mode 100644
index 0000000..745a038
--- /dev/null
+++ b/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+declare zeroext i1 @ri1()
+declare void @se1()
+declare void @se2()
+
+define void @test() #0 {
+entry:
+  %b = call zeroext i1 @ri1()
+  br label %next
+
+; CHECK-LABEL: @test
+; CHECK: bl ri1
+; CHECK-NEXT: nop
+; CHECK: andi. 3, 3, 1
+
+next:
+  br i1 %b, label %case1, label %case2
+
+case1:
+  call void @se1()
+  br label %end
+
+case2:
+  call void @se2()
+  br label %end
+
+end:
+  ret void
+
+; CHECK: blr
+}
+
+attributes #0 = { noinline optnone }
+
diff --git a/test/CodeGen/PowerPC/pip-inner.ll b/test/CodeGen/PowerPC/pip-inner.ll
new file mode 100644
index 0000000..930f0d3
--- /dev/null
+++ b/test/CodeGen/PowerPC/pip-inner.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* %x, double* nocapture readonly %y) #0 {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.end, %entry
+  %i.015 = phi i32 [ 0, %entry ], [ %inc7, %for.end ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
+  %0 = load double, double* %arrayidx, align 8
+  %add = fadd double %0, 1.000000e+00
+  %arrayidx5 = getelementptr inbounds double, double* %x, i64 %indvars.iv
+  store double %add, double* %arrayidx5, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 16000
+  br i1 %exitcond, label %for.end, label %for.body3
+
+for.end:                                          ; preds = %for.body3
+  tail call void @bar(double* %x) #2
+  %inc7 = add nuw nsw i32 %i.015, 1
+  %exitcond16 = icmp eq i32 %inc7, 1000
+  br i1 %exitcond16, label %for.end8, label %for.cond1.preheader
+
+for.end8:                                         ; preds = %for.end
+  ret void
+
+; CHECK-LABEL: @foo
+
+; CHECK: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
+; CHECK: fadd [[REG2:[0-9]+]], [[REG1]], {{[0-9]+}}
+; CHECK: stfdu [[REG2]], 8({{[0-9]+}})
+; CHECK: bdnz
+
+; CHECK: bl bar
+; CHECK-NEXT: nop
+
+; CHECK: blr
+}
+
+declare void @bar(double*) #1
+
+attributes #0 = { nounwind "target-cpu"="a2" }
+attributes #1 = { "target-cpu"="a2" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
index 6e0aec2..ad8ed38 100644
--- a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
+++ b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
@@ -5,7 +5,7 @@ target triple = "powerpc-unknown-linux-gnu"
 declare void @printf(i8*, ...)
 
 define void @main() {
-  call void (i8*, ...)* @printf(i8* undef, i1 false)
+  call void (i8*, ...) @printf(i8* undef, i1 false)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/ppc32-pic-large.ll b/test/CodeGen/PowerPC/ppc32-pic-large.ll
index 4c85ab9..6a06945 100644
--- a/test/CodeGen/PowerPC/ppc32-pic-large.ll
+++ b/test/CodeGen/PowerPC/ppc32-pic-large.ll
@@ -6,7 +6,7 @@ declare i32 @call_foo(i32, ...)
 define i32 @foo() {
 entry:
   %0 = load i32, i32* @bar, align 4
-  %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
+  %call = call i32 (i32, ...) @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
   ret i32 %0
 }
 
diff --git a/test/CodeGen/PowerPC/ppc32-pic.ll b/test/CodeGen/PowerPC/ppc32-pic.ll
index 74f9394..63f521c 100644
--- a/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -6,7 +6,7 @@ declare i32 @call_foo(i32, ...)
 define i32 @foo() {
 entry:
   %0 = load i32, i32* @bar, align 4
-  %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
+  %call = call i32 (i32, ...) @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
   ret i32 0
 }
 
diff --git a/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll b/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
index 479c7a7..dfa6ec0 100644
--- a/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
+++ b/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
@@ -8,7 +8,7 @@ define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i6
                         i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
                         i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
 entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
                 i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
                 i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
                 i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
diff --git a/test/CodeGen/PowerPC/ppc64-anyregcc.ll b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
index ab2feb6..66f6a2c 100644
--- a/test/CodeGen/PowerPC/ppc64-anyregcc.ll
+++ b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
@@ -82,7 +82,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 ; CHECK-NEXT:   .long 3
 define i64 @test() nounwind ssp uwtable {
 entry:
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 24, i8* null, i32 2, i32 1, i32 2, i64 3)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 0, i32 24, i8* null, i32 2, i32 1, i32 2, i64 3)
   ret i64 0
 }
 
@@ -104,7 +104,7 @@ entry:
 define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 24, i8* %f, i32 1, i8* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 1, i32 24, i8* %f, i32 1, i8* %obj)
   ret i64 %ret
 }
 
@@ -127,7 +127,7 @@ define i64 @property_access2() nounwind ssp uwtable {
 entry:
   %obj = alloca i64, align 8
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %f, i32 1, i64* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %f, i32 1, i64* %obj)
   ret i64 %ret
 }
 
@@ -150,7 +150,7 @@ define i64 @property_access3() nounwind ssp uwtable {
 entry:
   %obj = alloca i64, align 8
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 24, i8* %f, i32 0, i64* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 3, i32 24, i8* %f, i32 0, i64* %obj)
   ret i64 %ret
 }
 
@@ -232,7 +232,7 @@ entry:
 define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 24, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 4, i32 24, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
   ret i64 %ret
 }
 
@@ -314,7 +314,7 @@ entry:
 define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 281474417671919 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
   ret i64 %ret
 }
 
@@ -342,7 +342,7 @@ entry:
 ; CHECK-NEXT: .long  0
 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
   tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
 },~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
   ret i64 %result
@@ -384,7 +384,7 @@ define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
   tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
 },~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 13, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
   ret i64 %result
 }
 
diff --git a/test/CodeGen/PowerPC/ppc64-patchpoint.ll b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
index 48ffb6a..67b2626 100644
--- a/test/CodeGen/PowerPC/ppc64-patchpoint.ll
+++ b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
@@ -28,9 +28,9 @@ entry:
 ; CHECK: blr
 
   %resolveCall2 = inttoptr i64 244837814094590 to i8*
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
   %resolveCall3 = inttoptr i64 244837814094591 to i8*
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 24, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 24, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
   ret i64 %result
 }
 
@@ -51,7 +51,7 @@ entry:
   store i64 11, i64* %metadata
   store i64 12, i64* %metadata
   store i64 13, i64* %metadata
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
   ret void
 }
 
@@ -64,14 +64,14 @@ entry:
   %tmp80 = add i64 %tmp79, -16
   %tmp81 = inttoptr i64 %tmp80 to i64*
   %tmp82 = load i64, i64* %tmp81, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
   %tmp83 = load i64, i64* %tmp33, align 8
   %tmp84 = add i64 %tmp83, -24
   %tmp85 = inttoptr i64 %tmp84 to i64*
   %tmp86 = load i64, i64* %tmp85, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
   ret i64 10
 }
 
@@ -87,7 +87,7 @@ entry:
 ; CHECK-NEXT: nop
 ; CHECK-NOT:  nop
 ; CHECK: blr
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll b/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
index 368ddc5..19d65b9 100644
--- a/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
+++ b/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
@@ -16,7 +16,7 @@ entry:
 ; CHECK: mtlr [[REG1]]
 ; CHECK: blr
 
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  32)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  0, i32  32)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/ppc64-stackmap.ll b/test/CodeGen/PowerPC/ppc64-stackmap.ll
index bc974a0..917fa74 100644
--- a/test/CodeGen/PowerPC/ppc64-stackmap.ll
+++ b/test/CodeGen/PowerPC/ppc64-stackmap.ll
@@ -112,7 +112,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 define void @constantargs() {
 entry:
   %0 = inttoptr i64 244837814094590 to i8*
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 24, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 24, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
   ret void
 }
 
@@ -134,7 +134,7 @@ entry:
   ; Runtime void->void call.
   call void inttoptr (i64 244837814094590 to void ()*)()
   ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
   ret void
 }
 
@@ -160,7 +160,7 @@ entry:
 cold:
   ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
   %thunk = inttoptr i64 244837814094590 to i8*
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 24, i8* %thunk, i32 0, i64 %a, i64 %b)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 24, i8* %thunk, i32 0, i64 %a, i64 %b)
   unreachable
 ret:
   ret void
@@ -176,7 +176,7 @@ ret:
 define i64 @propertyRead(i64* %obj) {
 entry:
   %resolveRead = inttoptr i64 244837814094590 to i8*
-  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %resolveRead, i32 1, i64* %obj)
+  %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %resolveRead, i32 1, i64* %obj)
   %add = add i64 %result, 3
   ret i64 %add
 }
@@ -196,7 +196,7 @@ entry:
 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
 entry:
   %resolveWrite = inttoptr i64 244837814094590 to i8*
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 24, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 24, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
   ret void
 }
 
@@ -218,7 +218,7 @@ entry:
 define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
   %resolveCall = inttoptr i64 244837814094590 to i8*
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
   ret void
 }
 
@@ -240,7 +240,7 @@ entry:
 define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
   %resolveCall = inttoptr i64 244837814094590 to i8*
-  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
   %add = add i64 %result, 3
   ret i64 %add
 }
@@ -260,7 +260,7 @@ entry:
 ; CHECK-NEXT:   .short 31
 define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
 entry:
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 24, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 24, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
   ret void
 }
 
@@ -279,7 +279,7 @@ entry:
 ; CHECK-NEXT:   .short 31
 define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
 entry:
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
   ret void
 }
 
@@ -297,7 +297,7 @@ entry:
 ; CHECK-NEXT:   .long   33
 
 define void @liveConstant() {
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
   ret void
 }
 
@@ -314,7 +314,7 @@ define void @liveConstant() {
 ; CHECK-NEXT:   .long   {{[0-9]+}}
 define void @clobberLR(i32 %a) {
   tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/ppcf128-3.ll b/test/CodeGen/PowerPC/ppcf128-3.ll
index 5043b62..fe3b418 100644
--- a/test/CodeGen/PowerPC/ppcf128-3.ll
+++ b/test/CodeGen/PowerPC/ppcf128-3.ll
@@ -4,28 +4,28 @@
 define i32 @stp_sequence_set_short_data(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
 entry:
 	%tmp1112 = sitofp i16 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
 
 define i32 @stp_sequence_set_short_data2(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
 entry:
 	%tmp1112 = sitofp i8 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
 
 define i32 @stp_sequence_set_short_data3(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
 entry:
 	%tmp1112 = uitofp i16 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
 
 define i32 @stp_sequence_set_short_data4(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
 entry:
 	%tmp1112 = uitofp i8 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
-	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/CodeGen/PowerPC/remat-imm.ll b/test/CodeGen/PowerPC/remat-imm.ll
index 486495e..ffae8a9 100644
--- a/test/CodeGen/PowerPC/remat-imm.ll
+++ b/test/CodeGen/PowerPC/remat-imm.ll
@@ -9,7 +9,7 @@ define i32 @main() nounwind {
 entry:
 ; CHECK: li 4, 128
 ; CHECK-NOT: mr 4, {{.*}}
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/PowerPC/resolvefi-basereg.ll b/test/CodeGen/PowerPC/resolvefi-basereg.ll
index 2b22f95..a613c33 100644
--- a/test/CodeGen/PowerPC/resolvefi-basereg.ll
+++ b/test/CodeGen/PowerPC/resolvefi-basereg.ll
@@ -340,7 +340,7 @@ if.end:                                           ; preds = %if.then, %entry
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %65, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
   %66 = bitcast %struct.S1998* %agg.tmp115 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %66, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
-  call void (i32, ...)* @check1998va(i32 signext 1, double 1.000000e+00, %struct.S1998* byval align 16 %agg.tmp113, i64 2, %struct.S1998* byval align 16 %agg.tmp114, %struct.S1998* byval align 16 %agg.tmp115)
+  call void (i32, ...) @check1998va(i32 signext 1, double 1.000000e+00, %struct.S1998* byval align 16 %agg.tmp113, i64 2, %struct.S1998* byval align 16 %agg.tmp114, %struct.S1998* byval align 16 %agg.tmp115)
   %67 = bitcast %struct.S1998* %agg.tmp116 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %67, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
   %68 = bitcast %struct.S1998* %agg.tmp117 to i8*
@@ -349,7 +349,7 @@ if.end:                                           ; preds = %if.then, %entry
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %69, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
   %70 = bitcast %struct.S1998* %agg.tmp119 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %70, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
-  call void (i32, ...)* @check1998va(i32 signext 2, %struct.S1998* byval align 16 %agg.tmp116, %struct.S1998* byval align 16 %agg.tmp117, ppc_fp128 0xM40000000000000000000000000000000, %struct.S1998* byval align 16 %agg.tmp118, %struct.S1998* byval align 16 %agg.tmp119)
+  call void (i32, ...) @check1998va(i32 signext 2, %struct.S1998* byval align 16 %agg.tmp116, %struct.S1998* byval align 16 %agg.tmp117, ppc_fp128 0xM40000000000000000000000000000000, %struct.S1998* byval align 16 %agg.tmp118, %struct.S1998* byval align 16 %agg.tmp119)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll
index 3a0c897..2e34c65 100644
--- a/test/CodeGen/PowerPC/s000-alias-misched.ll
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -87,7 +87,7 @@ for.end10:                                        ; preds = %for.end
   %sub = sub nsw i64 %call11, %call1
   %conv = sitofp i64 %sub to double
   %div = fdiv double %conv, 1.000000e+06
-  %call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
+  %call12 = tail call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
   tail call void @check(i32 signext 1)
   ret i32 0
 }
diff --git a/test/CodeGen/PowerPC/stack-protector.ll b/test/CodeGen/PowerPC/stack-protector.ll
index 48bfbe6..8d255bd 100644
--- a/test/CodeGen/PowerPC/stack-protector.ll
+++ b/test/CodeGen/PowerPC/stack-protector.ll
@@ -14,7 +14,7 @@ entry:
 	%0 = load i8*, i8** %a_addr, align 4		; <i8*> [#uses=1]
 	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
   %buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8], [11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
+	%2 = call i32 (i8*, ...) @printf(i8* getelementptr ([11 x i8], [11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index bc7bee8..e1a26da 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -106,7 +106,7 @@ entry:
 	%27 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1", align 4		; <%struct.objc_selector*> [#uses=1]
 	%__block_holder_tmp_1.03 = bitcast %struct.__block_1* %__block_holder_tmp_1.0 to void (%struct.CGImage*)*		; <void (%struct.CGImage*)*> [#uses=1]
 	%28 = load %struct.objc_object*, %struct.objc_object** %self.1, align 4		; <%struct.objc_object*> [#uses=1]
-	%29 = call %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* inttoptr (i64 4294901504 to %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)*)(%struct.objc_object* %28, %struct.objc_selector* %27, void (%struct.CGImage*)* %__block_holder_tmp_1.03) nounwind		; <%struct.objc_object*> [#uses=0]
+	%29 = call %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...) inttoptr (i64 4294901504 to %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)*)(%struct.objc_object* %28, %struct.objc_selector* %27, void (%struct.CGImage*)* %__block_holder_tmp_1.03) nounwind		; <%struct.objc_object*> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -155,7 +155,7 @@ entry:
 	%15 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
 	%16 = load %struct.objc_super*, %struct.objc_super** %objc_super.5, align 4		; <%struct.objc_super*> [#uses=1]
 	%17 = load %struct.NSZone*, %struct.NSZone** %zone, align 4		; <%struct.NSZone*> [#uses=1]
-	%18 = call %struct.objc_object* (%struct.objc_super*, %struct.objc_selector*, ...)* @objc_msgSendSuper(%struct.objc_super* %16, %struct.objc_selector* %15, %struct.NSZone* %17) nounwind		; <%struct.objc_object*> [#uses=1]
+	%18 = call %struct.objc_object* (%struct.objc_super*, %struct.objc_selector*, ...) @objc_msgSendSuper(%struct.objc_super* %16, %struct.objc_selector* %15, %struct.NSZone* %17) nounwind		; <%struct.objc_object*> [#uses=1]
 	%19 = bitcast %struct.objc_object* %18 to %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep*> [#uses=1]
 	%20 = load %struct.NSBitmapImageRep**, %struct.NSBitmapImageRep*** %new, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
 	store %struct.NSBitmapImageRep* %19, %struct.NSBitmapImageRep** %20, align 4
diff --git a/test/CodeGen/PowerPC/varargs-struct-float.ll b/test/CodeGen/PowerPC/varargs-struct-float.ll
index dbdda05..7bb5a34 100644
--- a/test/CodeGen/PowerPC/varargs-struct-float.ll
+++ b/test/CodeGen/PowerPC/varargs-struct-float.ll
@@ -12,7 +12,7 @@ entry:
   store float %s.coerce, float* %coerce.dive, align 1
   %coerce.dive1 = getelementptr %struct.Sf1, %struct.Sf1* %s, i32 0, i32 0
   %0 = load float, float* %coerce.dive1, align 1
-  call void (i32, ...)* @testvaSf1(i32 1, float inreg %0)
+  call void (i32, ...) @testvaSf1(i32 1, float inreg %0)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index 45df814..858b85d 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,7 +1,5 @@
-; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp = load <4 x i32>, <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vperm-lowering.ll b/test/CodeGen/PowerPC/vperm-lowering.ll
index d55d26c..c78ffdd 100644
--- a/test/CodeGen/PowerPC/vperm-lowering.ll
+++ b/test/CodeGen/PowerPC/vperm-lowering.ll
@@ -9,58 +9,23 @@ define <16 x i8> @foo() nounwind ssp {
 }
 
 ; CHECK: .LCPI0_0:
-; CHECK: .byte 31
-; CHECK: .byte 26
-; CHECK: .byte 21
-; CHECK: .byte 16
-; CHECK: .byte 11
-; CHECK: .byte 6
-; CHECK: .byte 1
-; CHECK: .byte 28
-; CHECK: .byte 23
-; CHECK: .byte 18
-; CHECK: .byte 13
-; CHECK: .byte 8
-; CHECK: .byte 3
-; CHECK: .byte 30
-; CHECK: .byte 25
-; CHECK: .byte 20
-; CHECK: .LCPI0_1:
 ; CHECK: .byte 0
-; CHECK: .byte 1
-; CHECK: .byte 2
-; CHECK: .byte 3
-; CHECK: .byte 4
 ; CHECK: .byte 5
-; CHECK: .byte 6
-; CHECK: .byte 7
-; CHECK: .byte 8
-; CHECK: .byte 9
 ; CHECK: .byte 10
-; CHECK: .byte 11
-; CHECK: .byte 12
-; CHECK: .byte 13
-; CHECK: .byte 14
 ; CHECK: .byte 15
-; CHECK: .LCPI0_2:
-; CHECK: .byte 16
-; CHECK: .byte 17
-; CHECK: .byte 18
-; CHECK: .byte 19
 ; CHECK: .byte 20
-; CHECK: .byte 21
-; CHECK: .byte 22
-; CHECK: .byte 23
-; CHECK: .byte 24
 ; CHECK: .byte 25
-; CHECK: .byte 26
-; CHECK: .byte 27
-; CHECK: .byte 28
-; CHECK: .byte 29
 ; CHECK: .byte 30
-; CHECK: .byte 31
+; CHECK: .byte 3
+; CHECK: .byte 8
+; CHECK: .byte 13
+; CHECK: .byte 18
+; CHECK: .byte 23
+; CHECK: .byte 28
+; CHECK: .byte 1
+; CHECK: .byte 6
+; CHECK: .byte 11
 ; CHECK: foo:
-; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_2@toc@ha
-; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_2@toc@l
+; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_0@toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_0@toc@l
 ; CHECK: lvx [[REG3:[0-9]+]], 0, [[REG2]]
-; CHECK: vperm {{[0-9]+}}, [[REG3]], {{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/PowerPC/vsx-fma-m.ll b/test/CodeGen/PowerPC/vsx-fma-m.ll
index 64185a4..d859273 100644
--- a/test/CodeGen/PowerPC/vsx-fma-m.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-m.ll
@@ -98,9 +98,9 @@ entry:
 ; re-ordering the instructions.
 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
 
-; CHECK-DAG: xsmaddmdp 2, 3, 4
+; CHECK-DAG: xsmaddmdp 3, 2, 4
 ; CHECK-DAG: stxsdx [[F1]], 0, 8
-; CHECK-DAG: stxsdx 2, 8, [[C1]]
+; CHECK-DAG: stxsdx 3, 8, [[C1]]
 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
 ; CHECK: blr
@@ -269,10 +269,10 @@ entry:
 ; re-ordering the instructions.
 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
 
-; CHECK-DAG: xvmaddmdp 35, 36, 37
+; CHECK-DAG: xvmaddmdp 36, 35, 37
 ; CHECK-DAG: xvmaddadp 34, 35, 38
 ; CHECK-DAG: stxvd2x 32, 0, 3
-; CHECK-DAG: stxvd2x 35, 3, [[C1]]
+; CHECK-DAG: stxvd2x 36, 3, [[C1]]
 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
 ; CHECK: blr
diff --git a/test/CodeGen/PowerPC/vsx-spill-norwstore.ll b/test/CodeGen/PowerPC/vsx-spill-norwstore.ll
index c135a00..77b6cb2 100644
--- a/test/CodeGen/PowerPC/vsx-spill-norwstore.ll
+++ b/test/CodeGen/PowerPC/vsx-spill-norwstore.ll
@@ -23,7 +23,7 @@ check.exit69.i:                                   ; preds = %entry
   br i1 undef, label %if.then.i63.i, label %check.exit64.i
 
 if.then.i63.i:                                    ; preds = %check.exit69.i
-  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0)) #0
+  tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0)) #0
   br label %check.exit64.i
 
 check.exit64.i:                                   ; preds = %if.then.i63.i, %check.exit69.i
diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
index c381fc4..e7e13d6 100644
--- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.barrier.local() #1
 ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
 
-; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
 ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
 ; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
 ; CHECK: s_endpgm
diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
index 7110a90..5929898 100644
--- a/test/CodeGen/R600/ds_read2.ll
+++ b/test/CodeGen/R600/ds_read2.ll
@@ -7,7 +7,7 @@
  @lds.f64 = addrspace(3) global [512 x double] undef, align 8
 
 ; SI-LABEL: @simple_read2_f32
-; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
 ; SI: s_waitcnt lgkmcnt(0)
 ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
 ; SI: buffer_store_dword [[RESULT]]
@@ -26,7 +26,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
 }
 
 ; SI-LABEL: @simple_read2_f32_max_offset
-; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
 ; SI: s_waitcnt lgkmcnt(0)
 ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
 ; SI: buffer_store_dword [[RESULT]]
@@ -63,7 +63,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
 }
 
 ; SI-LABEL: @simple_read2_f32_x2
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
 ; SI: s_endpgm
 define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
@@ -94,7 +94,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
 
 ; Make sure there is an instruction between the two sets of reads.
 ; SI-LABEL: @simple_read2_f32_x2_barrier
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
 ; SI: s_barrier
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
 ; SI: s_endpgm
@@ -313,7 +313,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
 
 ; SI-LABEL: @simple_read2_f64
 ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
-; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
+; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8
 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
 ; SI: buffer_store_dwordx2 [[RESULT]]
 ; SI: s_endpgm
@@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
 }
 
 ; SI-LABEL: @simple_read2_f64_max_offset
-; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
 ; SI: s_endpgm
 define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -366,7 +366,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
 
 ; Alignment only 4
 ; SI-LABEL: @misaligned_read2_f64
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
 ; SI: s_endpgm
 define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
@@ -386,7 +386,7 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
 
 ; SI-LABEL: @load_constant_adjacent_offsets
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
 define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
   %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
   %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
@@ -397,7 +397,7 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
 
 ; SI-LABEL: @load_constant_disjoint_offsets
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2
 define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
   %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
   %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@@ -410,7 +410,7 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
 
 ; SI-LABEL: @load_misaligned64_constant_offsets
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
 define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
   %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
@@ -425,8 +425,8 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
 ; SI-LABEL: @load_misaligned64_constant_large_offsets
 ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
 ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
-; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
-; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1
 ; SI: s_endpgm
 define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
   %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
index 482debb..54b3b45 100644
--- a/test/CodeGen/R600/ds_read2st64.ll
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -5,7 +5,7 @@
 
 
 ; SI-LABEL: @simple_read2st64_f32_0_1
-; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
 ; SI: s_waitcnt lgkmcnt(0)
 ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
 ; SI: buffer_store_dword [[RESULT]]
@@ -117,7 +117,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
 }
 
 ; SI-LABEL: @simple_read2st64_f64_0_1
-; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
 ; SI: s_waitcnt lgkmcnt(0)
 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
 ; SI: buffer_store_dwordx2 [[RESULT]]
@@ -158,7 +158,7 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
 ; Alignment only
 
 ; SI-LABEL: @misaligned_read2st64_f64
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
 ; SI: s_endpgm
 define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
@@ -237,7 +237,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
 
 ; SI-LABEL: @byte_size_only_divisible_64_read2_f64
 ; SI-NOT: ds_read2st_b64
-; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
 ; SI: s_endpgm
 define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
index 1d94c57..b553d34 100644
--- a/test/CodeGen/R600/ds_write2.ll
+++ b/test/CodeGen/R600/ds_write2.ll
@@ -7,7 +7,7 @@
 ; SI-LABEL: @simple_write2_one_val_f32
 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 
 ; SI: s_endpgm
 define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
 ; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
 ; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
 ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
 ; SI-LABEL: @simple_write2_two_val_subreg2_f32
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
 ; SI-LABEL: @simple_write2_two_val_subreg4_f32
 ; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -144,7 +144,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
 ; SI: s_endpgm
 define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -179,7 +179,7 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
 }
 
 ; SI-LABEL: @simple_write2_two_val_f32_x2
-; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
 ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
 ; SI: s_endpgm
 define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
@@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
 ; SI-LABEL: @simple_write2_one_val_f64
 ; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
 ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
+; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -285,7 +285,7 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
 ; SI-LABEL: @misaligned_simple_write2_one_val_f64
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
 ; SI: s_endpgm
 define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
@@ -304,7 +304,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
 ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
+; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -324,7 +324,7 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
 
 ; SI-LABEL: @store_constant_adjacent_offsets
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
 define void @store_constant_adjacent_offsets() {
   store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
   store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
@@ -334,7 +334,7 @@ define void @store_constant_adjacent_offsets() {
 ; SI-LABEL: @store_constant_disjoint_offsets
 ; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
 ; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
+; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
 define void @store_constant_disjoint_offsets() {
   store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
   store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@@ -345,7 +345,7 @@ define void @store_constant_disjoint_offsets() {
 
 ; SI-LABEL: @store_misaligned64_constant_offsets
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
 ; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
 define void @store_misaligned64_constant_offsets() {
   store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
@@ -358,8 +358,8 @@ define void @store_misaligned64_constant_offsets() {
 ; SI-LABEL: @store_misaligned64_constant_large_offsets
 ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
 ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
-; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
 ; SI: s_endpgm
 define void @store_misaligned64_constant_large_offsets() {
   store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
index 2044df2..1d9d881 100644
--- a/test/CodeGen/R600/ds_write2st64.ll
+++ b/test/CodeGen/R600/ds_write2st64.ll
@@ -7,7 +7,7 @@
 ; SI-LABEL: @simple_write2st64_one_val_f32_0_1
 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
 ; SI: s_endpgm
 define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -46,7 +46,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
 ; SI: s_endpgm
 define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -85,7 +85,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
 
 ; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
 ; SI-NOT: ds_write2st64_b64
-; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
+; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
 ; SI: s_endpgm
 define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll
index c105598..3029bd02 100644
--- a/test/CodeGen/R600/fmaxnum.ll
+++ b/test/CodeGen/R600/fmaxnum.ll
@@ -11,6 +11,9 @@ declare double @llvm.maxnum.f64(double, double)
 
 ; FUNC-LABEL: @test_fmax_f32
 ; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
 define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
   %val = call float @llvm.maxnum.f32(float %a, float %b) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -20,6 +23,10 @@ define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwin
 ; FUNC-LABEL: @test_fmax_v2f32
 ; SI: v_max_f32_e32
 ; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
 define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
   %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
   store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -31,6 +38,12 @@ define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
 ; SI: v_max_f32_e32
 ; SI: v_max_f32_e32
 ; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
 define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
   %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
   store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -46,6 +59,17 @@ define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
 ; SI: v_max_f32_e32
 ; SI: v_max_f32_e32
 ; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
 define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
   %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
   store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -69,6 +93,27 @@ define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
 ; SI: v_max_f32_e32
 ; SI: v_max_f32_e32
 ; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
 define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
   %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
   store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -79,6 +124,10 @@ define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -89,6 +138,11 @@ define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344(nan)
 define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -99,6 +153,10 @@ define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -109,6 +167,10 @@ define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -119,6 +181,10 @@ define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -129,6 +195,10 @@ define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -139,6 +209,10 @@ define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -149,6 +223,10 @@ define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_max_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -157,6 +235,10 @@ define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
 
 ; FUNC-LABEL: @fmax_var_immediate_f32
 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -165,6 +247,9 @@ define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
 
 ; FUNC-LABEL: @fmax_immediate_var_f32
 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
 define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -174,6 +259,9 @@ define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
 ; FUNC-LABEL: @fmax_var_literal_f32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
 define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -183,6 +271,9 @@ define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
 ; FUNC-LABEL: @fmax_literal_var_f32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
 define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
   store float %val, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll
index 6b93b83..4d7b525 100644
--- a/test/CodeGen/R600/fminnum.ll
+++ b/test/CodeGen/R600/fminnum.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare float @llvm.minnum.f32(float, float) #0
 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
@@ -9,6 +10,9 @@ declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
 
 ; FUNC-LABEL: @test_fmin_f32
 ; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
 define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
   %val = call float @llvm.minnum.f32(float %a, float %b) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -18,6 +22,10 @@ define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwin
 ; FUNC-LABEL: @test_fmin_v2f32
 ; SI: v_min_f32_e32
 ; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
 define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
   %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
   store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -29,6 +37,12 @@ define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
 ; SI: v_min_f32_e32
 ; SI: v_min_f32_e32
 ; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
 define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
   %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
   store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -44,6 +58,17 @@ define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
 ; SI: v_min_f32_e32
 ; SI: v_min_f32_e32
 ; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
 define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
   %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
   store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -67,6 +92,27 @@ define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
 ; SI: v_min_f32_e32
 ; SI: v_min_f32_e32
 ; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
 define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
   %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
   store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -77,6 +123,10 @@ define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -87,6 +137,11 @@ define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
 define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -97,6 +152,10 @@ define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -107,6 +166,10 @@ define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -117,6 +180,10 @@ define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -127,6 +194,10 @@ define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -137,6 +208,10 @@ define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -147,6 +222,10 @@ define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
 ; SI-NOT: v_min_f32_e32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
 ; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
   %val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -155,6 +234,9 @@ define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
 
 ; FUNC-LABEL: @fmin_var_immediate_f32
 ; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
 define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float %a, float 2.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -163,6 +245,9 @@ define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
 
 ; FUNC-LABEL: @fmin_immediate_var_f32
 ; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
 define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float 2.0, float %a) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -172,6 +257,9 @@ define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
 ; FUNC-LABEL: @fmin_var_literal_f32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
 define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float %a, float 99.0) #0
   store float %val, float addrspace(1)* %out, align 4
@@ -181,6 +269,9 @@ define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
 ; FUNC-LABEL: @fmin_literal_var_f32
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
 define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float 99.0, float %a) #0
   store float %val, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll
index 4ea84a7..6618d8b 100644
--- a/test/CodeGen/R600/ftrunc.f64.ll
+++ b/test/CodeGen/R600/ftrunc.f64.ll
@@ -27,9 +27,9 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
 ; SI: s_lshr_b64
-; SI: cmp_gt_i32
 ; SI: s_not_b64
 ; SI: s_and_b64
+; SI: cmp_gt_i32
 ; SI: cndmask_b32
 ; SI: cndmask_b32
 ; SI: cmp_lt_i32
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index caa4b19..06a8b12 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -32,8 +32,8 @@
 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
 ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
 ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
 
 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll
index 4bf748e..816755e 100644
--- a/test/CodeGen/R600/operand-folding.ll
+++ b/test/CodeGen/R600/operand-folding.ll
@@ -19,7 +19,7 @@ endif:
 }
 
 ; CHECK-LABEL: {{^}}fold_imm:
-; CHECK v_or_i32_e32 v{{[0-9]+}}, 5
+; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
 define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
 entry:
   %fold = add i32 3, 2
diff --git a/test/CodeGen/R600/si-annotate-cf.ll b/test/CodeGen/R600/si-annotate-cf.ll
new file mode 100644
index 0000000..3f30988
--- /dev/null
+++ b/test/CodeGen/R600/si-annotate-cf.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:
+
+; SI: [[LOOP_LABEL:[A-Z0-9]+]]:
+; Lowered break instructin:
+; SI: s_or_b64
+; Lowered Loop instruction:
+; SI: s_andn2_b64
+; s_cbranch_execnz [[LOOP_LABEL]]
+; SI: s_endpgm
+define void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+main_body:
+  %0 = and i32 %a, %b
+  %1 = trunc i32 %0 to i1
+  br label %ENDIF
+
+ENDLOOP:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+
+ENDIF:
+  br i1 %1, label %ENDLOOP, label %ENDIF
+}
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index efb1de2..82d88eb 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -195,7 +195,7 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out,
 
 ; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
 ; SI: s_endpgm
 define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
   %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
@@ -243,7 +243,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
 
 ; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
 ; SI: s_endpgm
 define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
   %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
index c12e9c1..7975ee4 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
@@ -7,7 +7,7 @@ module asm "\09.section\09\22.dtors\22,#alloc,#write"
 
 define void @frame_dummy() nounwind {
 entry:
-	%asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind		; <void (i8*)*> [#uses=0]
+	%asmtmp = tail call void (i8*)* (void (i8*)*) asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind		; <void (i8*)*> [#uses=0]
 	unreachable
 }
 
diff --git a/test/CodeGen/SPARC/2011-01-11-Call.ll b/test/CodeGen/SPARC/2011-01-11-Call.ll
index 067bade..8097e49 100644
--- a/test/CodeGen/SPARC/2011-01-11-Call.ll
+++ b/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -22,8 +22,8 @@
 
 define void @test() nounwind {
 entry:
- %0 = tail call i32 (...)* @foo() nounwind
- tail call void (...)* @bar() nounwind
+ %0 = tail call i32 (...) @foo() nounwind
+ tail call void (...) @bar() nounwind
  ret void
 }
 
@@ -48,6 +48,6 @@ declare void @bar(...)
 
 define i32 @test_tail_call_with_return() nounwind {
 entry:
- %0 = tail call i32 (...)* @foo() nounwind
+ %0 = tail call i32 (...) @foo() nounwind
  ret i32 %0
 }
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index 8a3edc6..29bca67 100644
--- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -66,7 +66,7 @@ entry:
   br i1 %0, label %bb, label %bb1
 
 bb:                                               ; preds = %entry
-  %1 = tail call i32 (...)* @foo(i32 %a) nounwind
+  %1 = tail call i32 (...) @foo(i32 %a) nounwind
   ret i32 %1
 
 bb1:                                              ; preds = %entry
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index a7e482c..7c08998 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -436,7 +436,7 @@ declare i64 @receive_fp128(i64 %a, ...)
 ; CHECK:       call receive_fp128
 define i64 @test_fp128_variable_args(i64 %a, fp128 %b) {
 entry:
-  %0 = call i64 (i64, ...)* @receive_fp128(i64 %a, fp128 %b)
+  %0 = call i64 (i64, ...) @receive_fp128(i64 %a, fp128 %b)
   ret i64 %0
 }
 
diff --git a/test/CodeGen/SPARC/setjmp.ll b/test/CodeGen/SPARC/setjmp.ll
index e75ef96..17519c5 100644
--- a/test/CodeGen/SPARC/setjmp.ll
+++ b/test/CodeGen/SPARC/setjmp.ll
@@ -47,7 +47,7 @@ entry:
 
 bar.exit:                                         ; preds = %entry
   %8 = load i32, i32* %0, align 4, !tbaa !4
-  %9 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([30 x i8], [30 x i8]* @.cst, i32 0, i32 0), i32 %8) #0
+  %9 = call i32 (i8*, ...) @printf(i8* noalias getelementptr inbounds ([30 x i8], [30 x i8]* @.cst, i32 0, i32 0), i32 %8) #0
   ret i32 0
 }
 
diff --git a/test/CodeGen/SPARC/tls.ll b/test/CodeGen/SPARC/tls.ll
index d54cf60..a70637b 100644
--- a/test/CodeGen/SPARC/tls.ll
+++ b/test/CodeGen/SPARC/tls.ll
@@ -99,7 +99,7 @@ entry:
 ; v9abs-obj: ]
 
 ; pic-obj: Relocations [
-; pic-obj:  Section (2) .rela.text {
+; pic-obj:  Section {{.*}} .rela.text {
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_HIX22 local_symbol 0x0
diff --git a/test/CodeGen/SPARC/varargs.ll b/test/CodeGen/SPARC/varargs.ll
index 9f18644..c2d1e98 100644
--- a/test/CodeGen/SPARC/varargs.ll
+++ b/test/CodeGen/SPARC/varargs.ll
@@ -71,6 +71,6 @@ declare void @llvm.va_start(i8*)
 ; CHECK: , %f2
 define i32 @call_1d() #0 {
 entry:
-  %call = call double (i8*, double, ...)* @varargsfunc(i8* undef, double 1.000000e+00, double 2.000000e+00)
+  %call = call double (i8*, double, ...) @varargsfunc(i8* undef, double 1.000000e+00, double 2.000000e+00)
   ret i32 1
 }
diff --git a/test/CodeGen/SystemZ/ctpop-01.ll b/test/CodeGen/SystemZ/ctpop-01.ll
new file mode 100644
index 0000000..ad80f9f
--- /dev/null
+++ b/test/CodeGen/SystemZ/ctpop-01.ll
@@ -0,0 +1,96 @@
+; Test population-count instruction
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32 %a)
+declare i64 @llvm.ctpop.i64(i64 %a)
+
+define i32 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: popcnt  %r0, %r2
+; CHECK: sllk    %r1, %r0, 16
+; CHECK: ar      %r1, %r0
+; CHECK: sllk    %r2, %r1, 8
+; CHECK: ar      %r2, %r1
+; CHECK: srl     %r2, 24
+; CHECK: br      %r14
+
+  %popcnt = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %popcnt
+}
+
+define i32 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: llhr    %r0, %r2
+; CHECK: popcnt  %r0, %r0
+; CHECK: risblg  %r2, %r0, 16, 151, 8
+; CHECK: ar      %r2, %r0
+; CHECK: srl     %r2, 8
+; CHECK: br      %r14
+  %and = and i32 %a, 65535
+  %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+  ret i32 %popcnt
+}
+
+define i32 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: llcr    %r0, %r2
+; CHECK: popcnt  %r2, %r0
+; CHECK: br      %r14
+  %and = and i32 %a, 255
+  %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+  ret i32 %popcnt
+}
+
+define i64 @f4(i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK: popcnt  %r0, %r2
+; CHECK: sllg    %r1, %r0, 32
+; CHECK: agr     %r1, %r0
+; CHECK: sllg    %r0, %r1, 16
+; CHECK: agr     %r0, %r1
+; CHECK: sllg    %r1, %r0, 8
+; CHECK: agr     %r1, %r0
+; CHECK: srlg    %r2, %r1, 56
+; CHECK: br      %r14
+  %popcnt = call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %popcnt
+}
+
+define i64 @f5(i64 %a) {
+; CHECK-LABEL: f5:
+; CHECK: llgfr   %r0, %r2
+; CHECK: popcnt  %r0, %r0
+; CHECK: sllg    %r1, %r0, 16
+; CHECK: algfr   %r0, %r1
+; CHECK: sllg    %r1, %r0, 8
+; CHECK: algfr   %r0, %r1
+; CHECK: srlg    %r2, %r0, 24
+  %and = and i64 %a, 4294967295
+  %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+  ret i64 %popcnt
+}
+
+define i64 @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: llghr   %r0, %r2
+; CHECK: popcnt  %r0, %r0
+; CHECK: risbg   %r1, %r0, 48, 183, 8
+; CHECK: agr     %r1, %r0
+; CHECK: srlg    %r2, %r1, 8
+; CHECK: br      %r14
+  %and = and i64 %a, 65535
+  %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+  ret i64 %popcnt
+}
+
+define i64 @f7(i64 %a) {
+; CHECK-LABEL: f7:
+; CHECK: llgcr   %r0, %r2
+; CHECK: popcnt  %r2, %r0
+; CHECK: br      %r14
+  %and = and i64 %a, 255
+  %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+  ret i64 %popcnt
+}
+
diff --git a/test/CodeGen/SystemZ/htm-intrinsics.ll b/test/CodeGen/SystemZ/htm-intrinsics.ll
new file mode 100644
index 0000000..6441ef9
--- /dev/null
+++ b/test/CodeGen/SystemZ/htm-intrinsics.ll
@@ -0,0 +1,352 @@
+; Test transactional-execution intrinsics.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
+
+declare i32 @llvm.s390.tbegin(i8 *, i32)
+declare i32 @llvm.s390.tbegin.nofloat(i8 *, i32)
+declare void @llvm.s390.tbeginc(i8 *, i32)
+declare i32 @llvm.s390.tend()
+declare void @llvm.s390.tabort(i64)
+declare void @llvm.s390.ntstg(i64, i64 *)
+declare i32 @llvm.s390.etnd()
+declare void @llvm.s390.ppa.txassist(i32)
+
+; TBEGIN.
+define void @test_tbegin() {
+; CHECK-LABEL: test_tbegin:
+; CHECK-NOT: stmg
+; CHECK: std %f8,
+; CHECK: std %f9,
+; CHECK: std %f10,
+; CHECK: std %f11,
+; CHECK: std %f12,
+; CHECK: std %f13,
+; CHECK: std %f14,
+; CHECK: std %f15,
+; CHECK: tbegin 0, 65292
+; CHECK: ld %f8,
+; CHECK: ld %f9,
+; CHECK: ld %f10,
+; CHECK: ld %f11,
+; CHECK: ld %f12,
+; CHECK: ld %f13,
+; CHECK: ld %f14,
+; CHECK: ld %f15,
+; CHECK: br %r14
+  call i32 @llvm.s390.tbegin(i8 *null, i32 65292)
+  ret void
+}
+
+; TBEGIN (nofloat).
+define void @test_tbegin_nofloat1() {
+; CHECK-LABEL: test_tbegin_nofloat1:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: br %r14
+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+  ret void
+}
+
+; TBEGIN (nofloat) with integer CC return value.
+define i32 @test_tbegin_nofloat2() {
+; CHECK-LABEL: test_tbegin_nofloat2:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+  ret i32 %res
+}
+
+; TBEGIN (nofloat) with implicit CC check.
+define void @test_tbegin_nofloat3(i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat3:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: jnh  {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+  %cmp = icmp eq i32 %res, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %ptr, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; TBEGIN (nofloat) with dual CC use.
+define i32 @test_tbegin_nofloat4(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat4:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2,  {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+  %cmp = icmp eq i32 %res, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %ptr, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %res
+}
+
+; TBEGIN (nofloat) with register.
+define void @test_tbegin_nofloat5(i8 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat5:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0(%r2), 65292
+; CHECK: br %r14
+  call i32 @llvm.s390.tbegin.nofloat(i8 *%ptr, i32 65292)
+  ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0x0f00.
+define void @test_tbegin_nofloat6() {
+; CHECK-LABEL: test_tbegin_nofloat6:
+; CHECK: stmg %r6, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 3840
+; CHECK: br %r14
+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 3840)
+  ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xf100.
+define void @test_tbegin_nofloat7() {
+; CHECK-LABEL: test_tbegin_nofloat7:
+; CHECK: stmg %r8, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 61696
+; CHECK: br %r14
+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 61696)
+  ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfe00 -- stack pointer added automatically.
+define void @test_tbegin_nofloat8() {
+; CHECK-LABEL: test_tbegin_nofloat8:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65024)
+  ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- no frame pointer needed.
+define void @test_tbegin_nofloat9() {
+; CHECK-LABEL: test_tbegin_nofloat9:
+; CHECK: stmg %r10, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 64256
+; CHECK: br %r14
+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+  ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- frame pointer added automatically.
+define void @test_tbegin_nofloat10(i64 %n) {
+; CHECK-LABEL: test_tbegin_nofloat10:
+; CHECK: stmg %r11, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+  %buf = alloca i8, i64 %n
+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+  ret void
+}
+
+; TBEGINC.
+define void @test_tbeginc() {
+; CHECK-LABEL: test_tbeginc:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbeginc 0, 65288
+; CHECK: br %r14
+  call void @llvm.s390.tbeginc(i8 *null, i32 65288)
+  ret void
+}
+
+; TEND with integer CC return value.
+define i32 @test_tend1() {
+; CHECK-LABEL: test_tend1:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tend()
+  ret i32 %res
+}
+
+; TEND with implicit CC check.
+define void @test_tend3(i32 *%ptr) {
+; CHECK-LABEL: test_tend3:
+; CHECK: tend
+; CHECK: je  {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tend()
+  %cmp = icmp eq i32 %res, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %ptr, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; TEND with dual CC use.
+define i32 @test_tend2(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tend2:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2,  {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tend()
+  %cmp = icmp eq i32 %res, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %ptr, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %res
+}
+
+; TABORT with register only.
+define void @test_tabort1(i64 %val) {
+; CHECK-LABEL: test_tabort1:
+; CHECK: tabort 0(%r2)
+; CHECK: br %r14
+  call void @llvm.s390.tabort(i64 %val)
+  ret void
+}
+
+; TABORT with immediate only.
+define void @test_tabort2(i64 %val) {
+; CHECK-LABEL: test_tabort2:
+; CHECK: tabort 1234
+; CHECK: br %r14
+  call void @llvm.s390.tabort(i64 1234)
+  ret void
+}
+
+; TABORT with register + immediate.
+define void @test_tabort3(i64 %val) {
+; CHECK-LABEL: test_tabort3:
+; CHECK: tabort 1234(%r2)
+; CHECK: br %r14
+  %sum = add i64 %val, 1234
+  call void @llvm.s390.tabort(i64 %sum)
+  ret void
+}
+
+; TABORT with out-of-range immediate.
+define void @test_tabort4(i64 %val) {
+; CHECK-LABEL: test_tabort4:
+; CHECK: tabort 0({{%r[1-5]}})
+; CHECK: br %r14
+  call void @llvm.s390.tabort(i64 4096)
+  ret void
+}
+
+; NTSTG with base pointer only.
+define void @test_ntstg1(i64 *%ptr, i64 %val) {
+; CHECK-LABEL: test_ntstg1:
+; CHECK: ntstg %r3, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+  ret void
+}
+
+; NTSTG with base and index.
+; Check that VSTL doesn't allow an index.
+define void @test_ntstg2(i64 *%base, i64 %index, i64 %val) {
+; CHECK-LABEL: test_ntstg2:
+; CHECK: sllg [[REG:%r[1-5]]], %r3, 3
+; CHECK: ntstg %r4, 0([[REG]],%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64, i64 *%base, i64 %index
+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+  ret void
+}
+
+; NTSTG with the highest in-range displacement.
+define void @test_ntstg3(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg3:
+; CHECK: ntstg %r3, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64, i64 *%base, i64 65535
+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+  ret void
+}
+
+; NTSTG with an out-of-range positive displacement.
+define void @test_ntstg4(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg4:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %ptr = getelementptr i64, i64 *%base, i64 65536
+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+  ret void
+}
+
+; NTSTG with the lowest in-range displacement.
+define void @test_ntstg5(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg5:
+; CHECK: ntstg %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64, i64 *%base, i64 -65536
+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+  ret void
+}
+
+; NTSTG with an out-of-range negative displacement.
+define void @test_ntstg6(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg6:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %ptr = getelementptr i64, i64 *%base, i64 -65537
+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+  ret void
+}
+
+; ETND.
+define i32 @test_etnd() {
+; CHECK-LABEL: test_etnd:
+; CHECK: etnd %r2
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.etnd()
+  ret i32 %res
+}
+
+; PPA (Transaction-Abort Assist)
+define void @test_ppa_txassist(i32 %val) {
+; CHECK-LABEL: test_ppa_txassist:
+; CHECK: ppa %r2, 0, 1
+; CHECK: br %r14
+  call void @llvm.s390.ppa.txassist(i32 %val)
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/int-cmp-12.ll b/test/CodeGen/SystemZ/int-cmp-12.ll
index 077b224..d9c6a9f 100644
--- a/test/CodeGen/SystemZ/int-cmp-12.ll
+++ b/test/CodeGen/SystemZ/int-cmp-12.ll
@@ -49,13 +49,24 @@ define double @f4(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the next value up, which must use a register comparison.
+; Check the next value up, which can use a shifted comparison
 define double @f5(double %a, double %b, i64 %i1) {
 ; CHECK-LABEL: f5:
-; CHECK: clgrjl %r2,
+; CHECK: srlg [[REG:%r[0-5]]], %r2, 32
+; CHECK: cgije [[REG]], 0
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i64 %i1, 4294967296
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+; Check the next value up, which must use a register comparison.
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f6:
+; CHECK: clgrjl %r2,
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, 4294967297
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-47.ll b/test/CodeGen/SystemZ/int-cmp-47.ll
index 038a25b..274350d 100644
--- a/test/CodeGen/SystemZ/int-cmp-47.ll
+++ b/test/CodeGen/SystemZ/int-cmp-47.ll
@@ -309,7 +309,8 @@ exit:
 define void @f17(i64 %a) {
 ; CHECK-LABEL: f17:
 ; CHECK-NOT: tmhh
-; CHECK: llihh {{%r[0-5]}}, 49151
+; CHECK: srlg [[REG:%r[0-5]]], %r2, 48
+; CHECK: cgfi [[REG]], 49151
 ; CHECK-NOT: tmhh
 ; CHECK: br %r14
 entry:
diff --git a/test/CodeGen/SystemZ/int-cmp-50.ll b/test/CodeGen/SystemZ/int-cmp-50.ll
new file mode 100644
index 0000000..287ac2c
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-50.ll
@@ -0,0 +1,30 @@
+; Verify that we do not crash on always-true conditions
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0
+;
+; This test was compiled using clang -O0 from the following source code:
+;
+; int test(unsigned long x)
+; {
+;   return x >= 0 && x <= 15;
+; }
+
+define signext i32 @test(i64 %x) {
+entry:
+  %x.addr = alloca i64, align 8
+  store i64 %x, i64* %x.addr, align 8
+  %0 = load i64, i64* %x.addr, align 8
+  %cmp = icmp uge i64 %0, 0
+  br i1 %cmp, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %entry
+  %1 = load i64, i64* %x.addr, align 8
+  %cmp1 = icmp ule i64 %1, 15
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %2 = phi i1 [ false, %entry ], [ %cmp1, %land.rhs ]
+  %land.ext = zext i1 %2 to i32
+  ret i32 %land.ext
+}
+
diff --git a/test/CodeGen/SystemZ/risbg-03.ll b/test/CodeGen/SystemZ/risbg-03.ll
new file mode 100644
index 0000000..c3c08ad
--- /dev/null
+++ b/test/CodeGen/SystemZ/risbg-03.ll
@@ -0,0 +1,30 @@
+; Test use of RISBG vs RISBGN on zEC12.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
+
+; On zEC12, we generally prefer RISBGN.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: risbgn %r2, %r3, 60, 62, 0
+; CHECK: br %r14
+  %anda = and i64 %a, -15
+  %andb = and i64 %b, 14
+  %or = or i64 %anda, %andb
+  ret i64 %or
+}
+
+; But we may fall back to RISBG if we can use the condition code.
+define i64 @f2(i64 %a, i64 %b, i32* %c) {
+; CHECK-LABEL: f2:
+; CHECK: risbg %r2, %r3, 60, 62, 0
+; CHECK-NEXT: ipm
+; CHECK: br %r14
+  %anda = and i64 %a, -15
+  %andb = and i64 %b, 14
+  %or = or i64 %anda, %andb
+  %cmp = icmp sgt i64 %or, 0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* %c, align 4
+  ret i64 %or
+}
+
diff --git a/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll b/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll
new file mode 100644
index 0000000..65cc394
--- /dev/null
+++ b/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=systemz < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy:
+; CHECK: jg memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memmove:
+; CHECK: jg memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memset:
+; CHECK: jg memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
index 37bcc36..2d2ac9c 100644
--- a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
+++ b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
@@ -9,7 +9,7 @@ define void @f1() {
 	%tmp7 = load i32, i32* %tmp1
 	%tmp14 = lshr i32 %tmp7, 1
 	%tmp1415 = and i32 %tmp14, 1
-	call void (i32, ...)* @printf( i32 undef, i32 0, i32 %tmp1415 )
+	call void (i32, ...) @printf( i32 undef, i32 0, i32 %tmp1415 )
 	ret void
 }
 
diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 71fb005..079ab87 100644
--- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
+++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -25,12 +25,12 @@ entry:
 	%ret3 = bitcast i32* %ret to i8**		; <i8**> [#uses=2]
 	%tmp4 = call i32 @pthread_join( i32 %tmp2, i8** %ret3 )		; <i32> [#uses=0]
 	%tmp5 = load i32, i32* %ret		; <i32> [#uses=1]
-	%tmp7 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8], [14 x i8]* @.str, i32 0, i32 0), i32 %tmp5 )		; <i32> [#uses=0]
+	%tmp7 = call i32 (i8*, ...) @printf( i8* getelementptr ([14 x i8], [14 x i8]* @.str, i32 0, i32 0), i32 %tmp5 )		; <i32> [#uses=0]
 	%tmp8 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null )		; <i32> [#uses=0]
 	%tmp9 = load i32, i32* %t		; <i32> [#uses=1]
 	%tmp11 = call i32 @pthread_join( i32 %tmp9, i8** %ret3 )		; <i32> [#uses=0]
 	%tmp12 = load i32, i32* %ret		; <i32> [#uses=1]
-	%tmp14 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8], [14 x i8]* @.str1, i32 0, i32 0), i32 %tmp12 )		; <i32> [#uses=0]
+	%tmp14 = call i32 (i8*, ...) @printf( i8* getelementptr ([14 x i8], [14 x i8]* @.str1, i32 0, i32 0), i32 %tmp12 )		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 58b15c8..9235830 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -150,6 +150,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !97 = !MDLocation(line: 227, scope: !94, inlinedAt: !96)
 !98 = !MDLocation(line: 52, scope: !1)
 !101 = !MDFile(filename: "ggEdgeDiscrepancy.cc", directory: "/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src")
-!102 = !{i32 0}
+!102 = !{}
 !103 = !{!3, !77}
 !104 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
index ff574c2..d8e1651 100644
--- a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
+++ b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -48,7 +48,7 @@ do.body:                                          ; preds = %entry
   %tmp20 = bitcast %struct.RRRRRRRR* %agg.tmp16 to i8*
   %tmp21 = bitcast %struct.RRRRRRRR* %arrayidx19 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp20, i8* %tmp21, i32 312, i32 4, i1 false)
-  call void (i8*, i32, i8*, i8*, ...)* @CLLoggingLog(i8* %tmp, i32 2, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @__PRETTY_FUNCTION__._ZN12CLGll, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8], [75 x i8]* @.str, i32 0, i32 0), %struct.RRRRRRRR* byval %agg.tmp, %struct.RRRRRRRR* byval %agg.tmp4, %struct.RRRRRRRR* byval %agg.tmp10, %struct.RRRRRRRR* byval %agg.tmp16)
+  call void (i8*, i32, i8*, i8*, ...) @CLLoggingLog(i8* %tmp, i32 2, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @__PRETTY_FUNCTION__._ZN12CLGll, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8], [75 x i8]* @.str, i32 0, i32 0), %struct.RRRRRRRR* byval %agg.tmp, %struct.RRRRRRRR* byval %agg.tmp4, %struct.RRRRRRRR* byval %agg.tmp10, %struct.RRRRRRRR* byval %agg.tmp16)
   br label %do.end
 
 do.end:                                           ; preds = %do.body
diff --git a/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll b/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
index d39a760..accb82c 100644
--- a/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
+++ b/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
@@ -16,7 +16,7 @@ declare i8* @f2(i8*, i8*, ...)
 
 define internal void @f(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize ssp {
 entry:
-  %call14 = tail call i8* (i8*, i8*, ...)* (i8*, i8*)* @f1(i8* undef, i8* %_cmd) optsize
+  %call14 = tail call i8* (i8*, i8*, ...)* (i8*, i8*) @f1(i8* undef, i8* %_cmd) optsize
   %0 = bitcast i8* (i8*, i8*, ...)* %call14 to void (i8*, i8*, %0*, %0*)*
   tail call void %0(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize
   tail call void bitcast (i8* (i8*, i8*, ...)* @f2 to void (i8*, i8*, i32, %0*, %0*)*)(i8* %self, i8* undef, i32 2, %0* %inIndexes, %0* undef) optsize
diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll
index 0024d08..e12fcb1 100644
--- a/test/CodeGen/Thumb/asmprinter-bug.ll
+++ b/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -250,7 +250,7 @@ entry:
 	br label %bb
 
 bb:		; preds = %bb3, %entry
-	%0 = tail call  i32 (...)* @read(i32 0, i8* getelementptr ([500 x i8], [500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind		; <i32> [#uses=4]
+	%0 = tail call  i32 (...) @read(i32 0, i8* getelementptr ([500 x i8], [500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind		; <i32> [#uses=4]
 	%1 = icmp slt i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb1, label %bb2
 
@@ -266,7 +266,7 @@ bb3:		; preds = %bb2
 	%3 = shl i32 %0, 1		; <i32> [#uses=1]
 	tail call  void @adpcm_decoder(i8* getelementptr ([500 x i8], [500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16], [1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
 	%4 = shl i32 %0, 2		; <i32> [#uses=1]
-	%5 = tail call  i32 (...)* @write(i32 1, i16* getelementptr ([1000 x i16], [1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind		; <i32> [#uses=0]
+	%5 = tail call  i32 (...) @write(i32 1, i16* getelementptr ([1000 x i16], [1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind		; <i32> [#uses=0]
 	br label %bb
 
 bb4:		; preds = %bb2
@@ -275,7 +275,7 @@ bb4:		; preds = %bb2
 	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
 	%9 = load i8, i8* getelementptr (%struct.adpcm_state, %struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
 	%10 = sext i8 %9 to i32		; <i32> [#uses=1]
-	%11 = tail call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8], [28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
+	%11 = tail call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8], [28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index 7137429..1c7b631 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -27,7 +27,7 @@ bb:             ; preds = %bb, %entry
 bb7:            ; preds = %bb
         %tmp3 = bitcast i8* %tmp to i32*                ; <i32*> [#uses=1]
         %tmp.upgrd.3 = load i32, i32* %tmp3          ; <i32> [#uses=1]
-        %tmp10 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @str, i32 0, i64 0), i32 %tmp.upgrd.3 )                ; <i32> [#uses=0]
+        %tmp10 = call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @str, i32 0, i64 0), i32 %tmp.upgrd.3 )                ; <i32> [#uses=0]
         %va.upgrd.4 = bitcast i8** %va to i8*           ; <i8*> [#uses=1]
         call void @llvm.va_end( i8* %va.upgrd.4 )
         ret void
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 0dc04e0..e363a34 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -24,7 +24,7 @@ entry:
 	%15 = sext i8 %6 to i32		; <i32> [#uses=2]
 	%16 = sext i16 %10 to i32		; <i32> [#uses=2]
 	%17 = sext i16 %13 to i32		; <i32> [#uses=2]
-	%18 = call  i32 (i8*, ...)* @printf(i8* getelementptr ([36 x i8], [36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind		; <i32> [#uses=0]
+	%18 = call  i32 (i8*, ...) @printf(i8* getelementptr ([36 x i8], [36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind		; <i32> [#uses=0]
 	%19 = add i32 0, %15		; <i32> [#uses=1]
 	%20 = add i32 %19, %16		; <i32> [#uses=1]
 	%21 = add i32 %20, %14		; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
index e980bdb..b75a14b 100644
--- a/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
@@ -17,7 +17,7 @@ bb1:		; preds = %entry
 
 bb2:		; preds = %bb1
 	%0 = call i8* @llvm.frameaddress(i32 0)		; <i8*> [#uses=1]
-	%1 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8], [30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind		; <i32> [#uses=0]
+	%1 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8], [30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind		; <i32> [#uses=0]
 	unreachable
 
 bb9:		; preds = %bb1
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
index a670782..ccec979 100644
--- a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -67,22 +67,22 @@ FontSize.exit:		; preds = %bb.i1, %FontHalfXHeight.exit
 	br i1 %2, label %bb.i5, label %FontName.exit
 
 bb.i5:		; preds = %FontSize.exit
-	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8], [10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8], [10 x i8]* @.str81872, i32 0, i32 0)) nounwind
 	br label %FontName.exit
 
 FontName.exit:		; preds = %bb.i5, %FontSize.exit
-	%3 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8], [8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind		; <i32> [#uses=0]
+	%3 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8], [8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind		; <i32> [#uses=0]
 	%4 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8], [11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
 	%5 = sub i32 %colmark, undef		; <i32> [#uses=1]
 	%6 = sub i32 %rowmark, undef		; <i32> [#uses=1]
 	%7 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
-	%8 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
+	%8 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
 	%9 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
 	%10 = load i32, i32* %9, align 4		; <i32> [#uses=1]
 	%11 = sub i32 0, %10		; <i32> [#uses=1]
 	%12 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
-	%13 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
+	%13 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
 	br label %bb100.outer.outer
 
diff --git a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
index 0f277e4..89f47d9 100644
--- a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -94,7 +94,7 @@ bb1:		; preds = %bb, %entry
 	br i1 %8, label %bb2, label %bb3
 
 bb2:		; preds = %bb1
-	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8], [40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8], [40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
 	br label %bb3
 
 bb3:		; preds = %bb2, %bb1
@@ -124,7 +124,7 @@ bb9:		; preds = %bb8
 	br i1 %15, label %bb.i, label %FontHalfXHeight.exit
 
 bb.i:		; preds = %bb9
-	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8], [17 x i8]* @.str111875, i32 0, i32 0)) nounwind
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8], [17 x i8]* @.str111875, i32 0, i32 0)) nounwind
 	%.pre186 = load i32, i32* @currentfont, align 4		; <i32> [#uses=1]
 	br label %FontHalfXHeight.exit
 
@@ -139,7 +139,7 @@ bb1.i:		; preds = %bb.i1, %FontHalfXHeight.exit
 	br i1 undef, label %bb2.i, label %FontSize.exit
 
 bb2.i:		; preds = %bb1.i
-	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 37, i32 61, i8* getelementptr ([30 x i8], [30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 37, i32 61, i8* getelementptr ([30 x i8], [30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
 	unreachable
 
 FontSize.exit:		; preds = %bb1.i
@@ -151,33 +151,33 @@ FontSize.exit:		; preds = %bb1.i
 	br i1 %21, label %bb.i5, label %FontName.exit
 
 bb.i5:		; preds = %FontSize.exit
-	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8], [10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8], [10 x i8]* @.str81872, i32 0, i32 0)) nounwind
 	br label %FontName.exit
 
 FontName.exit:		; preds = %bb.i5, %FontSize.exit
 	%22 = phi %struct.FONT_INFO* [ undef, %bb.i5 ], [ undef, %FontSize.exit ]		; <%struct.FONT_INFO*> [#uses=1]
 	%23 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* %22, i32 %19, i32 5		; <%struct.rec**> [#uses=0]
-	%24 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8], [8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind		; <i32> [#uses=0]
+	%24 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8], [8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind		; <i32> [#uses=0]
 	br label %bb10
 
 bb10:		; preds = %FontName.exit, %bb8
 	%25 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8], [11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
 	%26 = sub i32 %rowmark, undef		; <i32> [#uses=1]
 	%27 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
-	%28 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
+	%28 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
-	%29 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8], [17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
+	%29 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8], [17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
 	%30 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	%31 = load i32, i32* %30, align 4		; <i32> [#uses=1]
 	%32 = sub i32 0, %31		; <i32> [#uses=1]
 	%33 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	%34 = sub i32 0, %33		; <i32> [#uses=1]
 	%35 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
-	%36 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
+	%36 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
 	%37 = load %struct.rec*, %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
 	%38 = getelementptr %struct.rec, %struct.rec* %37, i32 0, i32 0, i32 4		; <%struct.FOURTH_UNION*> [#uses=1]
-	%39 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8], [23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
+	%39 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8], [23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
 	%buff14 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 0		; <i8*> [#uses=5]
 	%40 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
 	%iftmp.506.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
@@ -245,7 +245,7 @@ bb.i56:		; preds = %bb27
 	br i1 undef, label %bb1.i58, label %bb2.i60
 
 bb1.i58:		; preds = %bb.i56
-	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8], [32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 31, i32 1, i8* getelementptr ([32 x i8], [32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
 	br label %bb2.i60
 
 bb2.i60:		; preds = %bb1.i58, %bb.i56
@@ -385,7 +385,7 @@ bb.i2:		; preds = %bb69
 	br i1 undef, label %bb1.i3, label %bb2.i4
 
 bb1.i3:		; preds = %bb.i2
-	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8], [32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 31, i32 1, i8* getelementptr ([32 x i8], [32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
 	br label %bb2.i4
 
 bb2.i4:		; preds = %bb1.i3, %bb.i2
@@ -502,7 +502,7 @@ bb102:		; preds = %bb101.split
 
 bb103:		; preds = %bb101.split
 	%99 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
-	%100 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8], [26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	%100 = call  i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8], [26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @wordcount, align 4
 	ret void
 }
diff --git a/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
index 13cf135..04dcb9d 100644
--- a/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
+++ b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
@@ -18,9 +18,9 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 define i32 @main() nounwind {
 entry:
-  %0 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8], [31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
-  %1 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8], [31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
-  %2 = tail call  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
-  %3 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  %0 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([31 x i8], [31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([31 x i8], [31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call  i32 (i32, ...) @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call  i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
index aa61c28..e59e84d 100644
--- a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -47,7 +47,7 @@ entry:
   store i8* %bp, i8** %bp_addr
   %0 = load i8*, i8** %in_addr, align 4                ; <i8*> [#uses=1]
   store i8* %0, i8** %out, align 4
-  %1 = call  i32 (...)* @foo() nounwind ; <i32> [#uses=1]
+  %1 = call  i32 (...) @foo() nounwind ; <i32> [#uses=1]
   store i32 %1, i32* %i, align 4
   %2 = load i32, i32* %three_by_three_addr, align 4    ; <i32> [#uses=1]
   %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 735e724..24a995a 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -32,10 +32,10 @@ entry:
   %tmp7 = extractelement <2 x double> %5, i32 0   ; <double> [#uses=1]
   %tmp5 = extractelement <2 x double> %5, i32 1   ; <double> [#uses=1]
 ; CHECK: printf
-  %7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
+  %7 = tail call  i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
   %tmp3 = extractelement <2 x double> %6, i32 0   ; <double> [#uses=1]
   %tmp1 = extractelement <2 x double> %6, i32 1   ; <double> [#uses=1]
-  %8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
+  %8 = tail call  i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
 
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index 1efbe4c..3b14d22 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -15,7 +15,7 @@ entry:
 bb:                                               ; preds = %entry
   %1 = alloca [1000 x i8], align 4                ; <[1000 x i8]*> [#uses=1]
   %.sub = getelementptr inbounds [1000 x i8], [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
-  %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
+  %2 = call i32 (i8*, i32, i32, i8*, ...) @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
   %3 = load i8, i8* %.sub, align 4                    ; <i8> [#uses=1]
   %4 = sext i8 %3 to i32                          ; <i32> [#uses=1]
   ret i32 %4
@@ -52,7 +52,7 @@ bb2:                                              ; preds = %bb
 ; CHECK-NOT: mov sp, r7
 ; CHECK-NOT: sub sp, #12
 ; CHECK: pop
-  %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  %4 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
 
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index b273a89..80a842d 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -4,6 +4,10 @@
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
 ; RUN: llc -mtriple=thumb-apple-darwin -mcpu=swift %s -o - \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-r4 %s -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-r4f %s -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
 ; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-r5 %s -o - \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
 
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
index ca94980..f6a5a60 100644
--- a/test/CodeGen/Thumb2/large-call.ll
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -22,7 +22,7 @@ entry:
   %d = alloca double, align 8
   store double 1.000000e+00, double* %d, align 8
   %0 = load double, double* %d, align 8
-  call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
+  call void (i8*, i8*, i8*, ...) @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
   ret i32 0
 }
 
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 91efc5d..1d2ba00 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -15,7 +15,7 @@ entry:
 	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
 
 cond_true:		; preds = %entry
-	%tmp10 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp10 = call i32 (...) @bar( )		; <i32> [#uses=0]
 	ret void
 
 UnifiedReturnBlock:		; preds = %entry
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
index d57638b..758f792 100644
--- a/test/CodeGen/Thumb2/thumb2-tbb.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -11,43 +11,43 @@ entry:
 
     switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
 bb:
-    tail call void(...)* @foo1()
+    tail call void(...) @foo1()
     ret void
 bb1:
-    tail call void(...)* @foo2()
+    tail call void(...) @foo2()
     ret void
 bb2:
-    tail call void(...)* @foo6()
+    tail call void(...) @foo6()
     ret void
 bb3:
-    tail call void(...)* @foo3()
+    tail call void(...) @foo3()
     ret void
 bb4:
-    tail call void(...)* @foo4()
+    tail call void(...) @foo4()
     ret void
 bb5:
-    tail call void(...)* @foo5()
+    tail call void(...) @foo5()
     ret void
 bb6:
-    tail call void(...)* @foo1()
+    tail call void(...) @foo1()
     ret void
 bb7:
-    tail call void(...)* @foo2()
+    tail call void(...) @foo2()
     ret void
 bb8:
-    tail call void(...)* @foo6()
+    tail call void(...) @foo6()
     ret void
 bb9:
-    tail call void(...)* @foo3()
+    tail call void(...) @foo3()
     ret void
 bb10:
-    tail call void(...)* @foo4()
+    tail call void(...) @foo4()
     ret void
 bb11:
-    tail call void(...)* @foo5()
+    tail call void(...) @foo5()
     ret void
 bb12:
-    tail call void(...)* @foo6()
+    tail call void(...) @foo6()
     ret void
 }
 
diff --git a/test/CodeGen/WinEH/cppeh-alloca-sink.ll b/test/CodeGen/WinEH/cppeh-alloca-sink.ll
new file mode 100644
index 0000000..d50237f
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-alloca-sink.ll
@@ -0,0 +1,180 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test describes two difficult cases in sinking allocas into child frames.
+; We don't currently do this optimization, but we'll need to tweak these tests
+; when we do.
+
+; This test is based on the following code:
+;
+; // In this case we can sink the alloca from the parent into the catch because
+; // the lifetime is limited to the catch.
+; extern "C" void may_throw();
+; extern "C" void sink_alloca_to_catch() {
+;   try {
+;     may_throw();
+;   } catch (int) {
+;     volatile int only_used_in_catch = 42;
+;   }
+; }
+;
+; // In this case we cannot. The variable should live as long as the parent
+; // frame lives.
+; extern "C" void use_catch_var(int *);
+; extern "C" void dont_sink_alloca_to_catch(int n) {
+;   int live_in_out_catch = 0;
+;   while (n > 0) {
+;     try {
+;       may_throw();
+;     } catch (int) {
+;       use_catch_var(&live_in_out_catch);
+;     }
+;     n--;
+;   }
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @may_throw() #1
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @llvm.eh.typeid.for(i8*) #2
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+declare void @llvm.eh.endcatch() #3
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+
+; Function Attrs: uwtable
+define void @sink_alloca_to_catch() #0 {
+entry:
+  %0 = alloca i32
+  %only_used_in_catch = alloca i32, align 4
+  invoke void @may_throw()
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+  %2 = extractvalue { i8*, i32 } %1, 1
+  %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
+  %matches = icmp eq i32 %2, %3
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %4 = extractvalue { i8*, i32 } %1, 0
+  call void @llvm.eh.begincatch(i8* %4, i8* null) #3
+  store volatile i32 42, i32* %only_used_in_catch, align 4
+  tail call void @llvm.eh.endcatch() #3
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %catch
+  ret void
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %1
+}
+
+; CHECK-LABEL: define void @sink_alloca_to_catch()
+; CHECK: call void (...) @llvm.frameescape(i32* %only_used_in_catch)
+
+declare void @use_catch_var(i32*) #1
+
+; Function Attrs: uwtable
+define void @dont_sink_alloca_to_catch(i32 %n) #0 {
+entry:
+  %0 = alloca i32
+  %n.addr = alloca i32, align 4
+  %live_in_out_catch = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 %n, i32* %n.addr, align 4
+  br label %while.cond
+
+while.cond:                                       ; preds = %try.cont, %entry
+  %1 = load i32, i32* %n.addr, align 4
+  %cmp = icmp sgt i32 %1, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  invoke void @may_throw()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %while.body
+  br label %try.cont
+
+lpad:                                             ; preds = %while.body
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
+  %3 = extractvalue { i8*, i32 } %2, 0
+  store i8* %3, i8** %exn.slot
+  %4 = extractvalue { i8*, i32 } %2, 1
+  store i32 %4, i32* %ehselector.slot
+  br label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %lpad
+  %sel = load i32, i32* %ehselector.slot
+  %5 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
+  %matches = icmp eq i32 %sel, %5
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %catch.dispatch
+  %exn = load i8*, i8** %exn.slot
+  call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
+  invoke void @use_catch_var(i32* %live_in_out_catch)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %catch
+  call void @llvm.eh.endcatch() #3
+  br label %try.cont
+
+try.cont:                                         ; preds = %invoke.cont2, %invoke.cont
+  %6 = load i32, i32* %0
+  %7 = load i32, i32* %n.addr, align 4
+  %dec = add nsw i32 %7, -1
+  store i32 %dec, i32* %n.addr, align 4
+  br label %while.cond
+
+lpad1:                                            ; preds = %catch
+  %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  %9 = extractvalue { i8*, i32 } %8, 0
+  store i8* %9, i8** %exn.slot
+  %10 = extractvalue { i8*, i32 } %8, 1
+  store i32 %10, i32* %ehselector.slot
+  call void @llvm.eh.endcatch() #3
+  br label %eh.resume
+
+while.end:                                        ; preds = %while.cond
+  ret void
+
+eh.resume:                                        ; preds = %lpad1, %catch.dispatch
+  %exn3 = load i8*, i8** %exn.slot
+  %sel4 = load i32, i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
+  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
+  resume { i8*, i32 } %lpad.val5
+}
+
+; CHECK-LABEL: define void @dont_sink_alloca_to_catch(i32 %n)
+; CHECK: call void (...) @llvm.frameescape(i32* %live_in_out_catch)
+
+; CHECK-LABEL: define internal i8* @sink_alloca_to_catch.catch(i8*, i8*)
+; CHECK: %only_used_in_catch.i8 = call i8* @llvm.framerecover({{.*}}, i32 0)
+; CHECK: %only_used_in_catch = bitcast
+
+; CHECK-LABEL: define internal i8* @dont_sink_alloca_to_catch.catch(i8*, i8*)
+; CHECK: %live_in_out_catch.i8 = call i8* @llvm.framerecover({{.*}}, i32 0)
+; CHECK: %live_in_out_catch = bitcast
+
+
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-catch-all.ll b/test/CodeGen/WinEH/cppeh-catch-all.ll
index 6e69862..4d017fd 100644
--- a/test/CodeGen/WinEH/cppeh-catch-all.ll
+++ b/test/CodeGen/WinEH/cppeh-catch-all.ll
@@ -19,7 +19,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc"
 
 ; The function entry in this case remains unchanged.
-; CHECK: define void @_Z4testv() #0 {
+; CHECK: define void @_Z4testv()
 ; CHECK: entry:
 ; CHECK:   invoke void @_Z9may_throwv()
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]+]]
@@ -38,7 +38,7 @@ invoke.cont:                                      ; preds = %entry
 ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           catch i8* null
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* null, i8* null, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @_Z4testv.catch)
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
 
 lpad:                                             ; preds = %entry
@@ -70,7 +70,7 @@ try.cont:                                         ; preds = %invoke.cont2, %invo
 ; CHECK: }
 }
 
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*) {
+; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   call void @_Z16handle_exceptionv()
 ; CHECK:   ret i8* blockaddress(@_Z4testv, %try.cont)
diff --git a/test/CodeGen/WinEH/cppeh-catch-scalar.ll b/test/CodeGen/WinEH/cppeh-catch-scalar.ll
index 0f8a7a8..4ea3838 100644
--- a/test/CodeGen/WinEH/cppeh-catch-scalar.ll
+++ b/test/CodeGen/WinEH/cppeh-catch-scalar.ll
@@ -21,10 +21,10 @@ target triple = "x86_64-pc-windows-msvc"
 @_ZTIi = external constant i8*
 
 ; The function entry will be rewritten like this.
-; CHECK: define void @_Z4testv() #0 {
+; CHECK: define void @_Z4testv()
 ; CHECK: entry:
 ; CHECK:   [[I_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   call void (...)* @llvm.frameescape(i32* [[I_PTR]])
+; CHECK:   call void (...) @llvm.frameescape(i32* [[I_PTR]])
 ; CHECK:   invoke void @_Z9may_throwv()
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]+]]
 
@@ -43,7 +43,7 @@ invoke.cont:                                      ; preds = %entry
 ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32* %i, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
 
 lpad:                                             ; preds = %entry
@@ -94,7 +94,7 @@ eh.resume:                                        ; preds = %catch.dispatch
 ; CHECK: }
 }
 
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*) {
+; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
 ; CHECK:   [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
diff --git a/test/CodeGen/WinEH/cppeh-catch-unwind.ll b/test/CodeGen/WinEH/cppeh-catch-unwind.ll
index 3db1635..37a5429 100644
--- a/test/CodeGen/WinEH/cppeh-catch-unwind.ll
+++ b/test/CodeGen/WinEH/cppeh-catch-unwind.ll
@@ -33,13 +33,10 @@ $"\01??_R0H@8" = comdat any
 
 ; CHECK-LABEL: define void @"\01?test@@YAXXZ"() #0 {
 ; CHECK: entry:
-; CHECK:   [[UNWIND_HELP:\%.+]] = alloca i64
 ; CHECK:   [[OBJ_PTR:\%.+]] = alloca %class.SomeClass
 ; CHECK:   [[TMP0:\%.+]] = alloca i32, align 4
 ; CHECK:   [[TMP1:\%.+]] = alloca i32, align 4
-; CHECK:   call void (...)* @llvm.frameescape(i32* [[TMP1]], %class.SomeClass* [[OBJ_PTR]], i32* [[TMP0]])
-; CHECK:   [[UNWIND_HELP_i8:\%.+]] = bitcast i64* [[UNWIND_HELP]] to i8*
-; CHECK:   call void @llvm.eh.unwindhelp(i8* [[UNWIND_HELP_i8]])
+; CHECK:   call void (...) @llvm.frameescape(i32* [[TMP1]], %class.SomeClass* [[OBJ_PTR]], i32* [[TMP0]])
 ; CHECK:   %call = invoke %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* %obj)
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]+]]
 
@@ -71,7 +68,7 @@ invoke.cont2:                                     ; preds = %invoke.cont
 ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
 ; CHECK:   [[LPAD_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* [[TMP1]], i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont15]
 
 lpad:                                             ; preds = %entry
@@ -85,7 +82,7 @@ lpad:                                             ; preds = %entry
 ; CHECK:   [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           cleanup
 ; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER1:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* [[TMP1]], i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER1]], [label %try.cont15]
 
 lpad1:                                            ; preds = %invoke.cont
@@ -100,7 +97,7 @@ lpad1:                                            ; preds = %invoke.cont
 ; CHECK:   [[LPAD3_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           cleanup
 ; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER3:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* [[TMP0]], i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup")
+; CHECK-NEXT:   [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup")
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER3]], [label %try.cont]
 
 lpad3:                                            ; preds = %invoke.cont2
@@ -178,7 +175,7 @@ eh.resume:                                        ; preds = %catch.dispatch7
 ; CHECK: }
 }
 
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) {
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_TMP1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
 ; CHECK:   [[TMP1_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP1]] to i32*
@@ -186,15 +183,15 @@ eh.resume:                                        ; preds = %catch.dispatch7
 ; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont15)
 ; CHECK: }
 
-; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*) {
+; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_OBJ:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
 ; CHECK:   [[OBJ_PTR:\%.+]] = bitcast i8* %obj.i8 to %class.SomeClass*
-; CHECK:   call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* [[OBJ_PTR]]) #3
+; CHECK:   call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* [[OBJ_PTR]])
 ; CHECK:   ret void
 ; CHECK: }
 
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) {
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_TMP0:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
 ; CHECK:   [[TMP0_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP0]] to i32*
@@ -208,7 +205,6 @@ eh.resume:                                        ; preds = %catch.dispatch7
 ; CHECK:   [[LPAD5_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK:           cleanup
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK:   unreachable
 ; CHECK: }
 
 declare %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* returned) #1
diff --git a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
new file mode 100644
index 0000000..5a57043
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
@@ -0,0 +1,91 @@
+; RUN: opt -winehprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; Modified based on this code:
+; struct HasDtor {
+;   ~HasDtor();
+; };
+; extern "C" void may_throw();
+; int main() {
+;   try {
+;     HasDtor o;
+;     may_throw();
+;   } catch (int) {
+;   }
+; }
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+%struct.HasDtor = type { i8 }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+
+define i32 @main() {
+entry:
+  %o = alloca %struct.HasDtor, align 1
+  invoke void @may_throw()
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %invoke.cont
+  call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
+  br label %try.cont
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  br label %catch.dispatch
+
+lpad1:                                            ; preds = %invoke.cont
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+  %4 = extractvalue { i8*, i32 } %3, 0
+  %5 = extractvalue { i8*, i32 } %3, 1
+  invoke void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
+	  to label %catch.dispatch unwind label %lpad
+
+catch.dispatch:                                   ; preds = %lpad1, %lpad
+  %exn.slot.0 = phi i8* [ %4, %lpad1 ], [ %1, %lpad ]
+  %ehselector.slot.0 = phi i32 [ %5, %lpad1 ], [ %2, %lpad ]
+  %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*))
+  %matches = icmp eq i32 %ehselector.slot.0, %6
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %catch.dispatch
+  call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null)
+  call void @llvm.eh.endcatch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %catch, %invoke.cont2
+  ret i32 0
+
+eh.resume:                                        ; preds = %catch.dispatch
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+  resume { i8*, i32 } %lpad.val5
+}
+
+; CHECK-LABEL: define i32 @main()
+; CHECK: @llvm.eh.actions(i32 0, void (i8*, i8*)* @main.cleanup, i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 -1, i8* (i8*, i8*)* @main.catch)
+
+; CHECK-LABEL: define internal void @main.cleanup(i8*, i8*)
+; CHECK: call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %{{.*}})
+; CHECK: ret void
+
+declare void @may_throw()
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor*)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture)
+declare void @llvm.eh.endcatch()
diff --git a/test/CodeGen/WinEH/cppeh-frame-vars.ll b/test/CodeGen/WinEH/cppeh-frame-vars.ll
index dc5ed1c..773dc94 100644
--- a/test/CodeGen/WinEH/cppeh-frame-vars.ll
+++ b/test/CodeGen/WinEH/cppeh-frame-vars.ll
@@ -47,7 +47,7 @@ $"\01??_R0H@8" = comdat any
 @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
 
 ; The function entry should be rewritten like this.
-; CHECK: define void @"\01?test@@YAXXZ"() #0 {
+; CHECK: define void @"\01?test@@YAXXZ"()
 ; CHECK: entry:
 ; CHECK:   [[NUMEXCEPTIONS_PTR:\%.+]] = alloca i32, align 4
 ; CHECK:   [[EXCEPTIONVAL_PTR:\%.+]] = alloca [10 x i32], align 16
@@ -58,7 +58,7 @@ $"\01??_R0H@8" = comdat any
 ; CHECK:   [[TMP:\%.+]] = bitcast %struct.SomeData* [[DATA_PTR]] to i8*
 ; CHECK:   call void @llvm.memset(i8* [[TMP]], i8 0, i64 8, i32 4, i1 false)
 ; CHECK:   store i32 0, i32* [[I_PTR]], align 4
-; CHECK:   call void (...)* @llvm.frameescape(i32* [[E_PTR]], i32* [[NUMEXCEPTIONS_PTR]], [10 x i32]* [[EXCEPTIONVAL_PTR]], i32* [[I_PTR]], %struct.SomeData* [[DATA_PTR]])
+; CHECK:   call void (...) @llvm.frameescape(i32* [[E_PTR]], i32* [[NUMEXCEPTIONS_PTR]], [10 x i32]* [[EXCEPTIONVAL_PTR]], i32* [[I_PTR]], %struct.SomeData* [[DATA_PTR]])
 ; CHECK:   br label %for.cond
 
 ; Function Attrs: uwtable
@@ -101,7 +101,7 @@ invoke.cont:                                      ; preds = %for.body
 ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %for.body
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* %e, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
 
 lpad:                                             ; preds = %for.body
@@ -196,7 +196,7 @@ eh.resume:                                        ; preds = %catch.dispatch
 }
 
 ; The following catch handler should be outlined.
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) {
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_E:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
 ; CHECK:   [[E_PTR1:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
diff --git a/test/CodeGen/WinEH/cppeh-inalloca.ll b/test/CodeGen/WinEH/cppeh-inalloca.ll
index f3f3631..93ec600 100644
--- a/test/CodeGen/WinEH/cppeh-inalloca.ll
+++ b/test/CodeGen/WinEH/cppeh-inalloca.ll
@@ -36,7 +36,7 @@ $"\01??_R0H@8" = comdat any
 @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
 
 ; The function entry should be rewritten like this.
-; CHECK: define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca) #0 {
+; CHECK: define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca)
 ; CHECK: entry:
 ; CHECK:   [[TMP_REGMEM:\%.+]] = alloca <{ %struct.A }>*
 ; CHECK:   [[TMP:\%.+]] = select i1 true, <{ %struct.A }>* %0, <{ %struct.A }>* undef
@@ -44,7 +44,7 @@ $"\01??_R0H@8" = comdat any
 ; CHECK:   [[RETVAL:\%.+]] = alloca i32, align 4
 ; CHECK:   [[E_PTR:\%.+]] = alloca i32, align 4
 ; CHECK:   [[CLEANUP_SLOT:\%.+]] = alloca i32
-; CHECK:   call void (...)* @llvm.frameescape(i32* %e, <{ %struct.A }>** [[TMP_REGMEM]], i32* [[RETVAL]], i32* [[CLEANUP_SLOT]])
+; CHECK:   call void (...) @llvm.frameescape(i32* %e, <{ %struct.A }>** [[TMP_REGMEM]], i32* [[RETVAL]], i32* [[CLEANUP_SLOT]])
 ; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
 
@@ -65,7 +65,7 @@ invoke.cont:                                      ; preds = %entry
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           cleanup
 ; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%recover.*]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* %e, i8* (i8*, i8*)* @"\01?test@@YAHUA@@@Z.catch", i32 0, void (i8*, i8*)* @"\01?test@@YAHUA@@@Z.cleanup")
+; CHECK-NEXT:   [[RECOVER:\%recover.*]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAHUA@@@Z.catch", i32 0, void (i8*, i8*)* @"\01?test@@YAHUA@@@Z.cleanup")
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %cleanup]
 
 lpad:                                             ; preds = %entry
@@ -142,7 +142,7 @@ eh.resume:                                        ; preds = %ehcleanup
 }
 
 ; The following catch handler should be outlined.
-; CHECK: define internal i8* @"\01?test@@YAHUA@@@Z.catch"(i8*, i8*) {
+; CHECK: define internal i8* @"\01?test@@YAHUA@@@Z.catch"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_E:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 0)
 ; CHECK:   [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
@@ -165,7 +165,7 @@ eh.resume:                                        ; preds = %ehcleanup
 ; CHECK: }
 
 ; The following cleanup handler should be outlined.
-; CHECK: define internal void @"\01?test@@YAHUA@@@Z.cleanup"(i8*, i8*) {
+; CHECK: define internal void @"\01?test@@YAHUA@@@Z.cleanup"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_EH_TEMP1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 1)
 ; CHECK:   [[EH_TEMP1:\%.+]] = bitcast i8* [[RECOVER_EH_TEMP]] to <{ %struct.A }>**
diff --git a/test/CodeGen/WinEH/cppeh-min-unwind.ll b/test/CodeGen/WinEH/cppeh-min-unwind.ll
index a1e97d5..7868b33 100644
--- a/test/CodeGen/WinEH/cppeh-min-unwind.ll
+++ b/test/CodeGen/WinEH/cppeh-min-unwind.ll
@@ -21,11 +21,11 @@ target triple = "x86_64-pc-windows-msvc"
 %class.SomeClass = type { [28 x i32] }
 
 ; The function entry should be rewritten like this.
-; CHECK: define void @_Z4testv() #0 {
+; CHECK: define void @_Z4testv()
 ; CHECK: entry:
 ; CHECK:   [[OBJ_PTR:\%.+]] = alloca %class.SomeClass, align 4
 ; CHECK:   call void @_ZN9SomeClassC1Ev(%class.SomeClass* [[OBJ_PTR]])
-; CHECK:   call void (...)* @llvm.frameescape(%class.SomeClass* [[OBJ_PTR]])
+; CHECK:   call void (...) @llvm.frameescape(%class.SomeClass* [[OBJ_PTR]])
 ; CHECK:   invoke void @_Z9may_throwv()
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]+]]
 
@@ -46,7 +46,7 @@ invoke.cont:                                      ; preds = %entry
 ; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           cleanup
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 0, void (i8*, i8*)* @_Z4testv.cleanup)
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @_Z4testv.cleanup)
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], []
 
 lpad:                                             ; preds = %entry
@@ -72,7 +72,7 @@ eh.resume:                                        ; preds = %lpad
 }
 
 ; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*) {
+; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_OBJ:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
 ; CHECK:   [[OBJ_PTR1:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass*
diff --git a/test/CodeGen/WinEH/cppeh-multi-catch.ll b/test/CodeGen/WinEH/cppeh-multi-catch.ll
new file mode 100644
index 0000000..6052629
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-multi-catch.ll
@@ -0,0 +1,229 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; void test()
+; {
+;   try {
+;     may_throw();
+;   } catch (int i) {
+;     handle_int(i);
+;   } catch (long long ll) {
+;     handle_long_long(ll);
+;   } catch (SomeClass &obj) {
+;     handle_obj(&obj);
+;   } catch (...) {
+;     handle_exception();
+;   }
+; }
+;
+; The catch handlers were edited to insert 'ret void' after the endcatch call.
+
+; ModuleID = 'catch-with-type.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.HandlerMapEntry = type { i32, i32 }
+%rtti.TypeDescriptor3 = type { i8**, i8*, [4 x i8] }
+%rtti.TypeDescriptor15 = type { i8**, i8*, [16 x i8] }
+%class.SomeClass = type { i8 }
+
+$"\01??_R0H@8" = comdat any
+
+$"\01??_R0_J@8" = comdat any
+
+$"\01??_R0?AVSomeClass@@@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@llvm.eh.handlermapentry.H = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
+@"\01??_R0_J@8" = linkonce_odr global %rtti.TypeDescriptor3 { i8** @"\01??_7type_info@@6B@", i8* null, [4 x i8] c"._J\00" }, comdat
+@llvm.eh.handlermapentry._J = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor3* @"\01??_R0_J@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
+@"\01??_R0?AVSomeClass@@@8" = linkonce_odr global %rtti.TypeDescriptor15 { i8** @"\01??_7type_info@@6B@", i8* null, [16 x i8] c".?AVSomeClass@@\00" }, comdat
+@"llvm.eh.handlermapentry.reference.?AVSomeClass@@" = private unnamed_addr constant %eh.HandlerMapEntry { i32 8, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor15* @"\01??_R0?AVSomeClass@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
+
+
+; CHECK: define void @"\01?test@@YAXXZ"() #0 {
+; CHECK: entry:
+; CHECK:   [[OBJ_PTR:\%.+]] = alloca %class.SomeClass*, align 8
+; CHECK:   [[LL_PTR:\%.+]] = alloca i64, align 8
+; CHECK:   [[I_PTR:\%.+]] = alloca i32, align 4
+; CHECK:   call void (...) @llvm.frameescape(i32* [[I_PTR]], i64* [[LL_PTR]], %class.SomeClass** [[OBJ_PTR]])
+; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]+]]
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %obj = alloca %class.SomeClass*, align 8
+  %ll = alloca i64, align 8
+  %i = alloca i32, align 4
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT:           catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
+; CHECK-NEXT:           catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
+; CHECK-NEXT:           catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
+; CHECK-NEXT:           catch i8* null
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(
+; CHECK-SAME:	     i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch",
+; CHECK-SAME:        i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1",
+; CHECK-SAME:        i32 1, i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2",
+; CHECK-SAME:        i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
+; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %catch14.split, label %catch10.split, label %catch6.split, label %catch.split]
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
+          catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
+          catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  store i8* %1, i8** %exn.slot
+  %2 = extractvalue { i8*, i32 } %0, 1
+  store i32 %2, i32* %ehselector.slot
+  br label %catch.dispatch
+
+; CHECK-NOT: catch.dispatch:
+catch.dispatch:                                   ; preds = %lpad
+  %sel = load i32, i32* %ehselector.slot
+  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*)) #3
+  %matches = icmp eq i32 %sel, %3
+  br i1 %matches, label %catch14, label %catch.fallthrough
+
+; CHECK-NOT: catch14:
+; CHECK: catch14.split:
+; CHECK-NEXT:   ret void
+catch14:                                          ; preds = %catch.dispatch
+  %exn15 = load i8*, i8** %exn.slot
+  %4 = bitcast i32* %i to i8*
+  call void @llvm.eh.begincatch(i8* %exn15, i8* %4) #3
+  %5 = load i32, i32* %i, align 4
+  call void @"\01?handle_int@@YAXH@Z"(i32 %5)
+  call void @llvm.eh.endcatch() #3
+  ret void
+
+try.cont:                                         ; preds = %invoke.cont
+  ret void
+
+; CHECK-NOT: catch.fallthrough:
+catch.fallthrough:                                ; preds = %catch.dispatch
+  %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*)) #3
+  %matches1 = icmp eq i32 %sel, %6
+  br i1 %matches1, label %catch10, label %catch.fallthrough2
+
+; CHECK-NOT: catch10:
+; CHECK: catch10.split:
+; CHECK-NEXT:   ret void
+catch10:                                          ; preds = %catch.fallthrough
+  %exn11 = load i8*, i8** %exn.slot
+  %7 = bitcast i64* %ll to i8*
+  call void @llvm.eh.begincatch(i8* %exn11, i8* %7) #3
+  %8 = load i64, i64* %ll, align 8
+  call void @"\01?handle_long_long@@YAX_J@Z"(i64 %8)
+  call void @llvm.eh.endcatch() #3
+  ret void
+
+; CHECK-NOT: catch.fallthrough2:
+catch.fallthrough2:                               ; preds = %catch.fallthrough
+  %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*)) #3
+  %matches3 = icmp eq i32 %sel, %9
+  br i1 %matches3, label %catch6, label %catch
+
+; CHECK-NOT: catch6:
+; CHECK: catch6.split:
+; CHECK-NEXT:   ret void
+catch6:                                           ; preds = %catch.fallthrough2
+  %exn7 = load i8*, i8** %exn.slot
+  %10 = bitcast %class.SomeClass** %obj to i8*
+  call void @llvm.eh.begincatch(i8* %exn7, i8* %10) #3
+  %11 = load %class.SomeClass*, %class.SomeClass** %obj, align 8
+  call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* %11)
+  call void @llvm.eh.endcatch() #3
+  ret void
+
+; CHECK-NOT: catch:
+; CHECK: catch.split:
+; CHECK-NEXT:   ret void
+catch:                                            ; preds = %catch.fallthrough2
+  %exn = load i8*, i8** %exn.slot
+  call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
+  call void @"\01?handle_exception@@YAXXZ"()  call void @llvm.eh.endcatch() #3
+  ret void
+; CHECK: }
+}
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
+; CHECK:   call void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
+; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %catch14.split)
+; CHECK: }
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*)
+; CHECK: entry:
+; CHECK:   [[RECOVER_LL:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK:   [[LL_PTR:\%.+]] = bitcast i8* [[RECOVER_LL]] to i64*
+; CHECK:   [[TMP2:\%.+]] = load i64, i64* [[LL_PTR]], align 8
+; CHECK:   call void @"\01?handle_long_long@@YAX_J@Z"(i64 [[TMP2]])
+; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %catch10.split)
+; CHECK: }
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*)
+; CHECK: entry:
+; CHECK:   [[RECOVER_OBJ:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK:   [[OBJ_PTR:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass**
+; CHECK:   [[TMP3:\%.+]] = load %class.SomeClass*, %class.SomeClass** [[OBJ_PTR]], align 8
+; CHECK:   call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* [[TMP3]])
+; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %catch6.split)
+; CHECK: }
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch3"(i8*, i8*)
+; CHECK: entry:
+; CHECK:   call void @"\01?handle_exception@@YAXXZ"()
+; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %catch.split)
+; CHECK: }
+
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+declare void @"\01?handle_exception@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+declare void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass*) #1
+
+declare void @"\01?handle_long_long@@YAX_J@Z"(i64) #1
+
+declare void @"\01?handle_int@@YAXH@Z"(i32) #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 233155) (llvm/trunk 233153)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-1.ll b/test/CodeGen/WinEH/cppeh-nested-1.ll
index e94e771..5cd2b18 100644
--- a/test/CodeGen/WinEH/cppeh-nested-1.ll
+++ b/test/CodeGen/WinEH/cppeh-nested-1.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-; XFAIL: *
 
 ; This test is based on the following code:
 ;
@@ -31,11 +30,11 @@ $"\01??_R0H@8" = comdat any
 @"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
 @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
 
-; CHECK: define void @"\01?test@@YAXXZ"() #0 {
+; CHECK: define void @"\01?test@@YAXXZ"()
 ; CHECK: entry:
 ; CHECK:   %i = alloca i32, align 4
 ; CHECK:   %f = alloca float, align 4
-; CHECK:   call void (...)* @llvm.frameescape(i32* %i, float* %f)
+; CHECK:   call void (...) @llvm.frameescape(i32* %i, float* %f)
 ; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
 
@@ -56,7 +55,7 @@ invoke.cont:                                      ; preds = %entry
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* %i, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch" to i8*), i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), float* %f, i8* bitcast (i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
+; CHECK:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
 ; CHECK:   indirectbr i8* [[RECOVER]], [label %try.cont, label %try.cont10]
 
 lpad:                                             ; preds = %entry
@@ -135,13 +134,10 @@ eh.resume:                                        ; %catch.dispatch3
 ; CHECK: }
 }
 
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) {
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
 ; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; ------------================= FAIL here =================------------
-; CHECK:   [[RECOVER_F:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
 ; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
 ; CHECK:   invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
 ; CHECK:           to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
@@ -152,13 +148,12 @@ eh.resume:                                        ; %catch.dispatch3
 ; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry
 ; CHECK:   [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; ------------================= FAIL here =================------------
-; CHECK:   [[RECOVER1:\%.+]] = call i8* (...)* @llvm.eh.actions({ i8*, i32 } [[LPAD1_VAL]], i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), float* [[F_PTR]], i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
+; CHECK:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
 ; CHECK:   indirectbr i8* [[RECOVER1]], []
 ;
 ; CHECK: }
 
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) {
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_F1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
 ; CHECK:   [[F_PTR1:\%.+]] = bitcast i8* [[RECOVER_F1]] to float*
diff --git a/test/CodeGen/WinEH/cppeh-nested-2.ll b/test/CodeGen/WinEH/cppeh-nested-2.ll
index 3479c41..ebcd3c9 100644
--- a/test/CodeGen/WinEH/cppeh-nested-2.ll
+++ b/test/CodeGen/WinEH/cppeh-nested-2.ll
@@ -38,13 +38,13 @@ target triple = "x86_64-pc-windows-msvc"
 @_ZTIi = external constant i8*
 
 ; The function entry should be rewritten like this.
-; CHECK: define void @_Z4testv() #0 {
+; CHECK: define void @_Z4testv()
 ; CHECK: entry:
 ; CHECK:   %outer = alloca %class.Outer, align 1
 ; CHECK:   %inner = alloca %class.Inner, align 1
 ; CHECK:   %i = alloca i32, align 4
 ; CHECK:   %f = alloca float, align 4
-; CHECK:   call void (...)* @llvm.frameescape(float* %f, i32* %i, %class.Outer* %outer, %class.Inner* %inner)
+; CHECK:   call void (...) @llvm.frameescape(float* %f, i32* %i, %class.Outer* %outer, %class.Inner* %inner)
 ; CHECK:   invoke void @_ZN5OuterC1Ev(%class.Outer* %outer)
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
 
@@ -93,7 +93,7 @@ invoke.cont5:                                     ; preds = %invoke.cont4
 ; CHECK: [[LPAD_LABEL]]:
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIf to i8*), float* %f, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont19]
 
 lpad:                                             ; preds = %try.cont, %entry
@@ -110,10 +110,10 @@ lpad:                                             ; preds = %try.cont, %entry
 ; CHECK-NEXT:           cleanup
 ; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT:   [[RECOVER1:\%.+]] = call i8* (...)* @llvm.eh.actions(
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32* %i, i8* (i8*, i8*)* @_Z4testv.catch1,
+; CHECK-NEXT:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(
+; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch1,
 ; CHECK-SAME:       i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIf to i8*), float* %f, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER1]], [label %try.cont, label %try.cont19]
 
 lpad1:                                            ; preds = %invoke.cont4, %invoke.cont
@@ -132,11 +132,11 @@ lpad1:                                            ; preds = %invoke.cont4, %invo
 ; CHECK-NEXT:           cleanup
 ; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT:   [[RECOVER3:\%.+]] = call i8* (...)* @llvm.eh.actions(
+; CHECK-NEXT:   [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(
 ; CHECK-SAME:       i32 0, void (i8*, i8*)* @_Z4testv.cleanup2,
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32* %i, i8* (i8*, i8*)* @_Z4testv.catch1,
+; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch1,
 ; CHECK-SAME:       i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIf to i8*), float* %f, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont19]
 
 lpad3:                                            ; preds = %invoke.cont2
@@ -241,7 +241,7 @@ eh.resume:                                        ; preds = %catch.dispatch11
 }
 
 ; This catch handler should be outlined.
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*) {
+; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_F:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
 ; CHECK:   [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
@@ -251,7 +251,7 @@ eh.resume:                                        ; preds = %catch.dispatch11
 ; CHECK: }
 
 ; This catch handler should be outlined.
-; CHECK: define internal i8* @_Z4testv.catch1(i8*, i8*) {
+; CHECK: define internal i8* @_Z4testv.catch1(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 1)
 ; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
@@ -268,7 +268,7 @@ eh.resume:                                        ; preds = %catch.dispatch11
 ; CHECK: }
 
 ; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*) {
+; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_OUTER:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 2)
 ; CHECK:   [[OUTER_PTR:\%.+]] = bitcast i8* [[RECOVER_OUTER]] to %class.Outer*
@@ -277,7 +277,7 @@ eh.resume:                                        ; preds = %catch.dispatch11
 ; CHECK: }
 
 ; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup2(i8*, i8*) {
+; CHECK: define internal void @_Z4testv.cleanup2(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_INNER:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 3)
 ; CHECK:   [[INNER_PTR:\%.+]] = bitcast i8* [[RECOVER_INNER]] to %class.Inner*
diff --git a/test/CodeGen/WinEH/cppeh-nested-3.ll b/test/CodeGen/WinEH/cppeh-nested-3.ll
index d28fa4d..4e33c55 100644
--- a/test/CodeGen/WinEH/cppeh-nested-3.ll
+++ b/test/CodeGen/WinEH/cppeh-nested-3.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-; XFAIL: *
 
 ; This test is based on the following code:
 ;
@@ -37,14 +36,12 @@ $"\01??_R0H@8" = comdat any
 @"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
 @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
 
-; CHECK: define void @"\01?test@@YAXXZ"() #0 {
+; CHECK: define void @"\01?test@@YAXXZ"()
 ; CHECK: entry:
 ; CHECK:   %i = alloca i32, align 4
-; ------------================= FAIL here =================------------
 ; CHECK:   %j = alloca i32, align 4
 ; CHECK:   %f = alloca float, align 4
-; ------------================= FAIL here =================------------
-; CHECK:   call void (...)* @llvm.frameescape(i32* %i, float* %f, int32* %j)
+; CHECK:   call void (...) @llvm.frameescape(i32* %i, float* %f, i32* %j)
 ; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
 ; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
 
@@ -66,7 +63,7 @@ invoke.cont:                                      ; preds = %entry
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* %i, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch" to i8*), i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), float* %f, i8* bitcast (i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
+; CHECK:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
 ; CHECK:   indirectbr i8* [[RECOVER]], [label %try.cont10, label %try.cont19]
 
 lpad:                                             ; preds = %entry
@@ -182,20 +179,14 @@ eh.resume:                                        ; preds = %lpad16, %catch.disp
 ; CHECK: }
 }
 
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) {
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
 ; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; ------------================= FAIL here =================------------
-; CHECK:   [[RECOVER_F:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
-; ------------================= FAIL here =================------------
-; CHECK:   [[RECOVER_J:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK:   [[J_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
 ; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
 ; CHECK:           to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
 ;
-; CHECK: invoke.cont2:                                     ; preds = %entry
+; CHECK: invoke.cont2:                                     ; preds = %[[LPAD1_LABEL]], %entry
 ; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
 ; CHECK:   invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
 ; CHECK:           to label %invoke.cont9 unwind label %[[LPAD8_LABEL:lpad[0-9]*]]
@@ -204,9 +195,8 @@ eh.resume:                                        ; preds = %lpad16, %catch.disp
 ; CHECK:   [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; ------------================= FAIL here =================------------
-; CHECK:   [[RECOVER1:\%.+]] = call i8* (...)* @llvm.eh.actions({ i8*, i32 } [[LPAD1_VAL]], i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* [[J_PTR]], i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2" to i8*), i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), float* [[F_PTR1]], i8* bitcast (i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
-; CHECK:   indirectbr i8* [[RECOVER1]], []
+; CHECK:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
+; CHECK:   indirectbr i8* [[RECOVER1]], [label %invoke.cont2]
 ;
 ; CHECK: invoke.cont9:
 ; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
@@ -214,30 +204,28 @@ eh.resume:                                        ; preds = %lpad16, %catch.disp
 ; CHECK: [[LPAD8_LABEL]]:{{[ ]+}}; preds = %invoke.cont2
 ; CHECK:   [[LPAD8_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; ------------================= FAIL here =================------------
-; CHECK:   [[RECOVER2:\%.+]] = call i8* (...)* @llvm.eh.actions({ i8*, i32 } [[LPAD8_VAL]], i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), float* [[F_PTR1]], i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
+; CHECK:   [[RECOVER2:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1")
 ; CHECK:   indirectbr i8* [[RECOVER2]], []
 ;
 ; CHECK: }
 
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) {
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*)
 ; CHECK: entry:
-; CHECK:   [[RECOVER_F1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[F_PTR1:\%.+]] = bitcast i8* [[RECOVER_F1]] to float*
-; CHECK:   [[TMP2:\%.+]] = load float, float* [[F_PTR1]], align 4
+; CHECK:   [[RECOVER_F:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK:   [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
+; CHECK:   [[TMP2:\%.+]] = load float, float* [[F_PTR]], align 4
 ; CHECK:   call void @"\01?handle_float@@YAXM@Z"(float [[TMP2]])
 ; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont19)
 ; CHECK: }
 
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*) {
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*)
 ; CHECK: entry:
-; ------------================= FAIL here =================------------
-; SHOULD-CHECK:   [[J_PTR1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK:   [[RECOVER_J:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK:   [[J_PTR:\%.+]] = bitcast i8* [[RECOVER_J]] to i32*
 ; CHECK:   [[RECOVER_I1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
 ; CHECK:   [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I1]] to i32*
-; CHECK:   [[TMP3:\%.+]] = load i32, i32* [[J_PTR1]], align 4
+; CHECK:   [[TMP3:\%.+]] = load i32, i32* [[J_PTR]], align 4
 ; CHECK:   store i32 [[TMP3]], i32* [[I_PTR1]]
-; ------------================= FAIL here =================------------
 ; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ.catch", %invoke.cont2)
 ; CHECK: }
 
diff --git a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
index 41d9006..de873d1 100644
--- a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
+++ b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
@@ -51,10 +51,12 @@ $"\01??_R0H@8" = comdat any
 @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
 
 ; The function entry should be rewritten like this.
-; CHECK: define void @"\01?test@@YAXXZ"() #0 {
+; CHECK: define void @"\01?test@@YAXXZ"()
 ; CHECK: entry:
 ; CHECK:   [[NUMEXCEPTIONS_REGMEM:\%.+]] = alloca i32
 ; CHECK:   [[I_REGMEM:\%.+]] = alloca i32
+; CHECK:   [[A_REGMEM:\%.+]] = alloca i32*
+; CHECK:   [[B_REGMEM:\%.+]] = alloca i32*
 ; CHECK:   [[E_PTR:\%.+]] = alloca i32, align 4
 ; CHECK:   [[EXCEPTIONVAL:\%.+]] = alloca [10 x i32], align 16
 ; CHECK:   [[DATA_PTR:\%.+]] = alloca i64, align 8
@@ -62,15 +64,13 @@ $"\01??_R0H@8" = comdat any
 ; CHECK:   [[TMP:\%.+]] = bitcast [10 x i32]* [[EXCEPTIONVAL]] to i8*
 ; CHECK:   call void @llvm.lifetime.start(i64 40, i8* [[TMP]])
 ; CHECK:   store i64 0, i64* [[DATA_PTR]], align 8
-; CHECK:   [[A_REGMEM:\%.+]] = alloca i32*
 ; CHECK:   [[A_PTR:\%.+]] = bitcast i64* [[DATA_PTR]] to i32*
 ; CHECK:   store i32* [[A_PTR]], i32** [[A_REGMEM]]
 ; CHECK:   [[B_PTR:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* [[TMPCAST]], i64 0, i32 1
-; CHECK:   [[B_REGMEM:\%.+]] = alloca i32*
 ; CHECK:   store i32* [[B_PTR]], i32** [[B_REGMEM]]
 ; CHECK:   store i32 0, i32* [[NUMEXCEPTIONS_REGMEM]]
 ; CHECK:   store i32 0, i32* [[I_REGMEM]]
-; CHECK:   call void (...)* @llvm.frameescape(i32* %e, i32* %NumExceptions.020.reg2mem, [10 x i32]* [[EXCEPTIONVAL]], i32* [[I_REGMEM]], i32** [[A_REGMEM]], i32** [[B_REGMEM]])
+; CHECK:   call void (...) @llvm.frameescape(i32* %e, i32* %NumExceptions.020.reg2mem, [10 x i32]* [[EXCEPTIONVAL]], i32* [[I_REGMEM]], i32** [[A_REGMEM]], i32** [[B_REGMEM]])
 ; CHECK:   br label %for.body
 
 ; Function Attrs: uwtable
@@ -114,7 +114,7 @@ invoke.cont:                                      ; preds = %for.body
 ; CHECK: [[LPAD_LABEL:lpad[0-9]*]]:{{[ ]+}}; preds = %for.body
 ; CHECK:   landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 ; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32* %e, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
 ; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
 
 lpad:                                             ; preds = %for.body
@@ -190,7 +190,7 @@ eh.resume:                                        ; preds = %lpad
 }
 
 ; The following catch handler should be outlined.
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) {
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
 ; CHECK: entry:
 ; CHECK:   [[RECOVER_E:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
 ; CHECK:   [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
new file mode 100644
index 0000000..f395d64
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; This test case is equivalent to:
+; extern "C" void may_throw();
+; extern "C" void test_catch_all() {
+;   try {
+;     may_throw();
+;   } catch (...) {
+;   }
+; }
+
+declare void @may_throw() #1
+declare i32 @__CxxFrameHandler3(...)
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
+declare void @llvm.eh.endcatch() #2
+
+; Function Attrs: nounwind uwtable
+define void @test_catch_all() #0 {
+entry:
+  invoke void @may_throw()
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  tail call void @llvm.eh.begincatch(i8* %1, i8* null) #2
+  tail call void @llvm.eh.endcatch() #2
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %lpad
+  ret void
+}
+
+; CHECK-LABEL: $handlerMap$0$test_catch_all:
+; CHECK:         .long   {{[0-9]+}}
+; CHECK:         .long   0
+; CHECK:         .long   0
+; CHECK:         .long   test_catch_all.catch@IMGREL
+; CHECK:         .long   .Ltest_catch_all.catch$parent_frame_offset
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
new file mode 100644
index 0000000..4946c6a
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
@@ -0,0 +1,163 @@
+; RUN: llc < %s | FileCheck %s
+
+; Verify that we get the right frame escape label when the catch comes after the
+; parent function.
+
+; This test case is equivalent to:
+; int main() {
+;   try {
+;     throw 42;
+;   } catch (int e) {
+;     printf("e: %d\n", e);
+;   }
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+%eh.CatchHandlerType = type { i32, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+$"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+@"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [7 x i8] c"e: %d\0A\00", comdat, align 1
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+; Function Attrs: uwtable
+define i32 @main() #1 {
+entry:
+  %tmp.i = alloca i32, align 4
+  %e = alloca i32, align 4
+  %0 = bitcast i32* %tmp.i to i8*
+  store i32 42, i32* %tmp.i, align 4, !tbaa !2
+  call void (...) @llvm.frameescape(i32* %e)
+  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #6
+          to label %.noexc unwind label %lpad1
+
+.noexc:                                           ; preds = %entry
+  unreachable
+
+lpad1:                                            ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @main.catch)
+  indirectbr i8* %recover, [label %try.cont.split]
+
+try.cont.split:                                   ; preds = %lpad1
+  ret i32 0
+}
+
+; CHECK-LABEL: main:
+; CHECK:        .seh_handlerdata
+; CHECK:        .long   ($cppxdata$main)@IMGREL
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #4
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #3
+
+define internal i8* @main.catch(i8*, i8*) #5 {
+entry:
+  %e.i8 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
+  %e = bitcast i8* %e.i8 to i32*
+  %2 = bitcast i32* %e to i8*
+  %3 = load i32, i32* %e, align 4, !tbaa !2
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@", i64 0, i64 0), i32 %3)
+  invoke void @llvm.donothing()
+          to label %entry.split unwind label %stub
+
+entry.split:                                      ; preds = %entry
+  ret i8* blockaddress(@main, %try.cont.split)
+
+stub:                                             ; preds = %entry
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  unreachable
+}
+
+; CHECK-LABEL: main.catch:
+; CHECK:        .seh_handlerdata
+; CHECK:        .long   ($cppxdata$main)@IMGREL
+
+; CHECK-NEXT: $cppxdata$main:
+; CHECK-NEXT:         .long   429065506
+; CHECK-NEXT:         .long   2
+; CHECK-NEXT:         .long   ($stateUnwindMap$main)@IMGREL
+; CHECK-NEXT:         .long   1
+; CHECK-NEXT:         .long   ($tryMap$main)@IMGREL
+; CHECK-NEXT:         .long   1
+; CHECK-NEXT:         .long   ($ip2state$main)@IMGREL
+; CHECK-NEXT:         .long   40
+; CHECK-NEXT:         .long   0
+; CHECK-NEXT:         .long   1
+
+; Make sure we get the right frame escape label.
+
+; CHECK: $handlerMap$0$main:
+; CHECK-NEXT:         .long   0
+; CHECK-NEXT:         .long   "??_R0H@8"@IMGREL
+; CHECK-NEXT:         .long   .Lmain$frame_escape_0
+; CHECK-NEXT:         .long   main.catch@IMGREL
+; CHECK-NEXT:         .long   .Lmain.catch$parent_frame_offset
+
+; Function Attrs: nounwind readnone
+declare void @llvm.donothing() #2
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #3
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #2
+
+attributes #0 = { noreturn uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="main" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { "wineh-parent"="main" }
+attributes #6 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 "}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch.ll b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
new file mode 100644
index 0000000..98b4afc
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
@@ -0,0 +1,203 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; This test case is equivalent to:
+; void f() {
+;   try {
+;     try {
+;       may_throw();
+;     } catch (int &) {
+;       may_throw();
+;     }
+;     may_throw();
+;   } catch (double) {
+;   }
+; }
+
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+
+$"\01??_R0N@8" = comdat any
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0N@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".N\00" }, comdat
+@llvm.eh.handlertype.N.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0N@8" to i8*) }, section "llvm.metadata"
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.8 = private unnamed_addr constant %eh.CatchHandlerType { i32 8, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+
+define internal i8* @"\01?f@@YAXXZ.catch"(i8*, i8*) #4 {
+entry:
+  %.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 0)
+  %bc2 = bitcast i8* %.i8 to i32**
+  %bc3 = bitcast i32** %bc2 to i8*
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %entry
+  ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont)
+
+lpad1:                                            ; preds = %entry
+  %lp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
+  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
+  indirectbr i8* %recover, [label %invoke.cont2]
+}
+
+; CHECK-LABEL: "?f@@YAXXZ.catch":
+; No code should be generated for the indirectbr.
+; CHECK-NOT: jmpq *
+; CHECK:        .seh_handlerdata
+; CHECK:        .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
+
+
+define internal i8* @"\01?f@@YAXXZ.catch1"(i8*, i8*) #4 {
+entry:
+  %.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 1)
+  %2 = bitcast i8* %.i8 to double*
+  %3 = bitcast double* %2 to i8*
+  invoke void (...)* @llvm.donothing()
+          to label %done unwind label %lpad
+
+done:
+  ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont8)
+
+lpad:                                             ; preds = %entry
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  unreachable
+}
+
+; CHECK-LABEL: "?f@@YAXXZ.catch1":
+; No code should be generated for the indirectbr.
+; CHECK-NOT: jmpq *
+; CHECK: ".L?f@@YAXXZ.catch1$parent_frame_offset" = 16
+; CHECK:         movq    %rdx, 16(%rsp)
+; CHECK:        .seh_handlerdata
+; CHECK:        .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
+
+define void @"\01?f@@YAXXZ"() #0 {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %0 = alloca i32*, align 8
+  %1 = alloca double, align 8
+  call void (...) @llvm.frameescape(i32** %0, double* %1)
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %invoke.cont unwind label %lpad2
+
+invoke.cont:                                      ; preds = %entry
+  br label %try.cont
+
+lpad2:                                            ; preds = %entry
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.8
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
+  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.8 to i8*), i32 0, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
+  indirectbr i8* %recover, [label %try.cont, label %try.cont8]
+
+try.cont:                                         ; preds = %lpad2, %invoke.cont
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %try.cont8 unwind label %lpad1
+
+lpad1:
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
+  %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
+  indirectbr i8* %recover2, [label %try.cont8]
+
+try.cont8:                                        ; preds = %lpad2, %try.cont
+  ret void
+}
+
+; CHECK-LABEL: "?f@@YAXXZ":
+; No code should be generated for the indirectbr.
+; CHECK-NOT: jmpq *
+; CHECK:             .seh_handlerdata
+; CHECK-NEXT:        .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:"$cppxdata$?f@@YAXXZ":
+; CHECK-NEXT:        .long   429065506
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   ("$stateUnwindMap$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:        .long   2
+; CHECK-NEXT:        .long   ("$tryMap$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:        .long   3
+; CHECK-NEXT:        .long   ("$ip2state$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:        .long   32
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:"$stateUnwindMap$?f@@YAXXZ":
+; CHECK-NEXT:        .long   -1
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   -1
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:"$tryMap$?f@@YAXXZ":
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   2
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   ("$handlerMap$0$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   2
+; CHECK-NEXT:        .long   3
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   ("$handlerMap$1$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:"$handlerMap$0$?f@@YAXXZ":
+; CHECK-NEXT:        .long   8
+; CHECK-NEXT:        .long   "??_R0H@8"@IMGREL
+; CHECK-NEXT:        .long   ".L?f@@YAXXZ$frame_escape_0"
+; CHECK-NEXT:        .long   "?f@@YAXXZ.catch"@IMGREL
+; CHECK-NEXT:        .long   ".L?f@@YAXXZ.catch$parent_frame_offset"
+; CHECK-NEXT:"$handlerMap$1$?f@@YAXXZ":
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   "??_R0N@8"@IMGREL
+; CHECK-NEXT:        .long   ".L?f@@YAXXZ$frame_escape_1"
+; CHECK-NEXT:        .long   "?f@@YAXXZ.catch1"@IMGREL
+; CHECK-NEXT:        .long   ".L?f@@YAXXZ.catch1$parent_frame_offset"
+; CHECK-NEXT:"$ip2state$?f@@YAXXZ":
+; CHECK-NEXT:        .long   .Ltmp0@IMGREL
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   .Ltmp13@IMGREL
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   .Ltmp16@IMGREL
+; CHECK-NEXT:        .long   0
+
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #3
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #3
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #2
+
+declare void @llvm.donothing(...)
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?f@@YAXXZ" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { "wineh-parent"="?f@@YAXXZ" }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
new file mode 100644
index 0000000..b958589
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
@@ -0,0 +1,241 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+%struct.S = type { i8 }
+
+$"\01??_DS@@QEAA@XZ" = comdat any
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+
+; CHECK-LABEL: "?test1@@YAXXZ":
+; CHECK:             .seh_handlerdata
+; CHECK-NEXT:        .long   ("$cppxdata$?test1@@YAXXZ")@IMGREL
+; CHECK-NEXT:"$cppxdata$?test1@@YAXXZ":
+; CHECK-NEXT:        .long   429065506
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   ("$stateUnwindMap$?test1@@YAXXZ")@IMGREL
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   ("$ip2state$?test1@@YAXXZ")@IMGREL
+; CHECK-NEXT:        .long   32
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:"$stateUnwindMap$?test1@@YAXXZ":
+; CHECK-NEXT:        .long   -1
+; CHECK-NEXT:        .long   "?test1@@YAXXZ.cleanup"@IMGREL
+; CHECK-NEXT:"$ip2state$?test1@@YAXXZ":
+; CHECK-NEXT:        .long   .Ltmp0@IMGREL
+; CHECK-NEXT:        .long   0
+
+define void @"\01?test1@@YAXXZ"() #0 {
+entry:
+  %unwindhelp = alloca i64
+  %tmp = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %tmp
+  %0 = bitcast i32* %tmp to i8*
+  call void (...) @llvm.frameescape()
+  store volatile i64 -2, i64* %unwindhelp
+  %1 = bitcast i64* %unwindhelp to i8*
+  call void @llvm.eh.unwindhelp(i8* %1)
+  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #8
+          to label %unreachable unwind label %lpad1
+
+lpad1:                                            ; preds = %entry
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test1@@YAXXZ.cleanup")
+  indirectbr i8* %recover, []
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+define linkonce_odr void @"\01??_DS@@QEAA@XZ"(%struct.S* %this) unnamed_addr #1 comdat align 2 {
+entry:
+  %this.addr = alloca %struct.S*, align 8
+  store %struct.S* %this, %struct.S** %this.addr, align 8
+  %this1 = load %struct.S*, %struct.S** %this.addr
+  call void @"\01??1S@@QEAA@XZ"(%struct.S* %this1) #4
+  ret void
+}
+
+; CHECK-LABEL: "?test2@@YAX_N@Z":
+; CHECK:             .seh_handlerdata
+; CHECK-NEXT:        .long   ("$cppxdata$?test2@@YAX_N@Z")@IMGREL
+; CHECK-NEXT:"$cppxdata$?test2@@YAX_N@Z":
+; CHECK-NEXT:        .long   429065506
+; CHECK-NEXT:        .long   2
+; CHECK-NEXT:        .long   ("$stateUnwindMap$?test2@@YAX_N@Z")@IMGREL
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   ("$ip2state$?test2@@YAX_N@Z")@IMGREL
+; CHECK-NEXT:        .long   40
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:"$stateUnwindMap$?test2@@YAX_N@Z":
+; CHECK-NEXT:        .long   -1
+; CHECK-NEXT:        .long   "?test2@@YAX_N@Z.cleanup"@IMGREL
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   "?test2@@YAX_N@Z.cleanup1"@IMGREL
+; CHECK-NEXT:"$ip2state$?test2@@YAX_N@Z":
+; CHECK-NEXT:        .long   .Lfunc_begin1@IMGREL
+; CHECK-NEXT:        .long   -1
+; CHECK-NEXT:        .long   .Ltmp7@IMGREL
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   .Ltmp9@IMGREL
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   .Ltmp12@IMGREL
+; CHECK-NEXT:        .long   0
+
+define void @"\01?test2@@YAX_N@Z"(i1 zeroext %b) #2 {
+  %b.addr = alloca i8, align 1
+  %s = alloca %struct.S, align 1
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %s1 = alloca %struct.S, align 1
+  %frombool = zext i1 %b to i8
+  store i8 %frombool, i8* %b.addr, align 1
+  call void (...) @llvm.frameescape(%struct.S* %s, %struct.S* %s1)
+  call void @"\01?may_throw@@YAXXZ"()
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %invoke.cont unwind label %lpad1
+
+invoke.cont:                                      ; preds = %entry
+  %1 = load i8, i8* %b.addr, align 1
+  %tobool = trunc i8 %1 to i1
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %invoke.cont
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %invoke.cont3 unwind label %lpad3
+
+invoke.cont3:                                     ; preds = %if.then
+  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
+  br label %if.end
+
+lpad1:                                            ; preds = %entry, %if.end
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
+  indirectbr i8* %recover, []
+
+lpad3:                                            ; preds = %if.then
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  %recover4 = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup1", i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
+  indirectbr i8* %recover4, []
+
+if.else:                                          ; preds = %invoke.cont
+  call void @"\01?dont_throw@@YAXXZ"() #4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %invoke.cont3
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %invoke.cont4 unwind label %lpad1
+
+invoke.cont4:                                     ; preds = %if.end
+  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
+  ret void
+}
+
+declare void @"\01?may_throw@@YAXXZ"() #3
+
+; Function Attrs: nounwind
+declare void @"\01?dont_throw@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @"\01??1S@@QEAA@XZ"(%struct.S*) #1
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #4
+
+define internal void @"\01?test1@@YAXXZ.cleanup"(i8*, i8*) #5 {
+entry:
+  %s = alloca %struct.S, align 1
+  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #4
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #6
+
+; Function Attrs: nounwind
+declare void @llvm.eh.unwindhelp(i8*) #4
+
+define internal void @"\01?test2@@YAX_N@Z.cleanup"(i8*, i8*) #7 {
+entry:
+  %s.i8 = call i8* @llvm.framerecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 0)
+  %s = bitcast i8* %s.i8 to %struct.S*
+  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
+  invoke void @llvm.donothing()
+          to label %entry.split unwind label %stub
+
+entry.split:                                      ; preds = %entry
+  ret void
+
+stub:                                             ; preds = %entry
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  unreachable
+}
+
+define internal void @"\01?test2@@YAX_N@Z.cleanup1"(i8*, i8*) #7 {
+entry:
+  %s1.i8 = call i8* @llvm.framerecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 1)
+  %s1 = bitcast i8* %s1.i8 to %struct.S*
+  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
+  invoke void @llvm.donothing()
+          to label %entry.split unwind label %stub
+
+entry.split:                                      ; preds = %entry
+  ret void
+
+stub:                                             ; preds = %entry
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+          cleanup
+  unreachable
+}
+
+declare void @llvm.donothing()
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test1@@YAXXZ" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test2@@YAX_N@Z" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+attributes #5 = { "wineh-parent"="?test1@@YAXXZ" }
+attributes #6 = { nounwind readnone }
+attributes #7 = { "wineh-parent"="?test2@@YAX_N@Z" }
+attributes #8 = { noreturn }
diff --git a/test/CodeGen/WinEH/seh-catch-all.ll b/test/CodeGen/WinEH/seh-catch-all.ll
new file mode 100644
index 0000000..fb2b9ba
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-catch-all.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -winehprepare -sehprepare < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+@str.__except = internal unnamed_addr constant [9 x i8] c"__except\00", align 1
+
+; Function Attrs: uwtable
+
+declare i32 @puts(i8*)
+
+define void @may_crash() {
+entry:
+  store volatile i32 42, i32* null, align 4
+  ret void
+}
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.frameaddress(i32)
+
+; Function Attrs: uwtable
+define void @seh_catch_all() {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  invoke void @may_crash()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  br label %__try.cont
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  store i8* %1, i8** %exn.slot
+  %2 = extractvalue { i8*, i32 } %0, 1
+  store i32 %2, i32* %ehselector.slot
+  br label %__except
+
+__except:                                         ; preds = %lpad
+  %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.__except, i32 0, i32 0))
+  br label %__try.cont
+
+__try.cont:                                       ; preds = %__except, %invoke.cont
+  ret void
+}
+
+; CHECK-LABEL: define void @seh_catch_all()
+; CHECK: landingpad
+; CHECK-NEXT: catch i8* null
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@seh_catch_all, %catch.all))
+; CHECK-NEXT: indirectbr
+;
+; CHECK: catch.all:
+; CHECK-NOT: extractvalue
+; CHECK: call i32 @puts
diff --git a/test/CodeGen/WinEH/seh-inlined-finally.ll b/test/CodeGen/WinEH/seh-inlined-finally.ll
new file mode 100644
index 0000000..2e6171a
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-inlined-finally.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -winehprepare -sehprepare < %s | FileCheck %s
+
+; Check that things work when the mid-level optimizer inlines the finally
+; block.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare i32 @puts(i8*)
+declare void @may_crash()
+declare i32 @__C_specific_handler(...)
+
+define void @use_finally() {
+entry:
+  invoke void @may_crash()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %call.i = tail call i32 @puts(i8* null)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          cleanup
+  %call.i2 = tail call i32 @puts(i8* null)
+  resume { i8*, i32 } %0
+}
+
+; CHECK-LABEL: define void @use_finally()
+; CHECK: invoke void @may_crash()
+;
+; CHECK: landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @use_finally.cleanup)
+; CHECK-NEXT: indirectbr i8* %recover, []
diff --git a/test/CodeGen/WinEH/seh-outlined-finally.ll b/test/CodeGen/WinEH/seh-outlined-finally.ll
new file mode 100644
index 0000000..bc9d322
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-outlined-finally.ll
@@ -0,0 +1,155 @@
+; RUN: opt -S -winehprepare -sehprepare -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+; Test case based on this code:
+;
+; extern "C" int _abnormal_termination();
+; #pragma intrinsic(_abnormal_termination)
+; extern "C" int printf(const char *, ...);
+; extern "C" void may_crash() {
+;   *(volatile int *)0 = 42;
+; }
+; int main() {
+;   int myres = 0;
+;   __try {
+;     __try {
+;       may_crash();
+;     } __finally {
+;       printf("inner finally %d\n", _abnormal_termination());
+;       may_crash();
+;     }
+;   } __finally {
+;     printf("outer finally %d\n", _abnormal_termination());
+;   }
+; }
+;
+; Note that if the inner finally crashes, the outer finally still runs. There
+; is nothing like a std::terminate call in this situation.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+@str_outer_finally = linkonce_odr unnamed_addr constant [18 x i8] c"outer finally %d\0A\00", align 1
+@str_inner_finally = linkonce_odr unnamed_addr constant [18 x i8] c"inner finally %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define void @may_crash() #0 {
+entry:
+  store volatile i32 42, i32* null, align 4
+  ret void
+}
+
+; Function Attrs: uwtable
+define i32 @main() #1 {
+entry:
+  %myres = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %myres, align 4
+  invoke void @may_crash() #4
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  invoke void @"\01?fin$1@0@main@@"(i1 zeroext false, i8* %0) #4
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %invoke.cont
+  %1 = call i8* @llvm.frameaddress(i32 0)
+  call void @"\01?fin$0@0@main@@"(i1 zeroext false, i8* %1)
+  ret i32 0
+
+lpad:                                             ; preds = %entry
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          cleanup
+  %3 = extractvalue { i8*, i32 } %2, 0
+  store i8* %3, i8** %exn.slot
+  %4 = extractvalue { i8*, i32 } %2, 1
+  store i32 %4, i32* %ehselector.slot
+  %5 = call i8* @llvm.frameaddress(i32 0)
+  invoke void @"\01?fin$1@0@main@@"(i1 zeroext true, i8* %5) #4
+          to label %invoke.cont3 unwind label %lpad1
+
+lpad1:                                            ; preds = %lpad, %invoke.cont
+  %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          cleanup
+  %7 = extractvalue { i8*, i32 } %6, 0
+  store i8* %7, i8** %exn.slot
+  %8 = extractvalue { i8*, i32 } %6, 1
+  store i32 %8, i32* %ehselector.slot
+  br label %ehcleanup
+
+invoke.cont3:                                     ; preds = %lpad
+  br label %ehcleanup
+
+ehcleanup:                                        ; preds = %invoke.cont3, %lpad1
+  %9 = call i8* @llvm.frameaddress(i32 0)
+  call void @"\01?fin$0@0@main@@"(i1 zeroext true, i8* %9)
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %ehcleanup
+  %exn = load i8*, i8** %exn.slot
+  %sel = load i32, i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val4
+}
+
+; CHECK-NOT: define internal void @
+
+; CHECK-LABEL: define i32 @main()
+; CHECK: invoke void @may_crash()
+;
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$1@0@main@@", i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
+; CHECK-NEXT: indirectbr
+;
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
+; CHECK-NEXT: indirectbr
+
+; There should not be any *new* cleanup helpers, just the existing ones.
+; CHECK-NOT: define internal void @
+; CHECK: define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
+; CHECK-NOT: define internal void @
+; CHECK: define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
+; CHECK-NOT: define internal void @
+
+define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
+entry:
+  %frame_pointer.addr = alloca i8*, align 8
+  %abnormal_termination.addr = alloca i8, align 1
+  store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
+  %frombool = zext i1 %abnormal_termination to i8
+  store i8 %frombool, i8* %abnormal_termination.addr, align 1
+  %0 = zext i1 %abnormal_termination to i32
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_outer_finally, i32 0, i32 0), i32 %0)
+  ret void
+}
+
+declare i32 @printf(i8*, ...) #2
+
+define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
+entry:
+  %frame_pointer.addr = alloca i8*, align 8
+  %abnormal_termination.addr = alloca i8, align 1
+  store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
+  %frombool = zext i1 %abnormal_termination to i8
+  store i8 %frombool, i8* %abnormal_termination.addr, align 1
+  %0 = zext i1 %abnormal_termination to i32
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_inner_finally, i32 0, i32 0), i32 %0)
+  call void @may_crash()
+  ret void
+}
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.frameaddress(i32) #3
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { noinline }
diff --git a/test/CodeGen/WinEH/seh-simple.ll b/test/CodeGen/WinEH/seh-simple.ll
index 26b3f83..344a0c8 100644
--- a/test/CodeGen/WinEH/seh-simple.ll
+++ b/test/CodeGen/WinEH/seh-simple.ll
@@ -39,7 +39,7 @@ eh.resume:
 ; CHECK-LABEL: define i32 @simple_except_store()
 ; CHECK: landingpad { i8*, i32 }
 ; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i8* null, i8* blockaddress(@simple_except_store, %__except))
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@simple_except_store, %__except))
 ; CHECK-NEXT: indirectbr {{.*}} [label %__except]
 
 define i32 @catch_all() {
@@ -63,7 +63,7 @@ return:
 ; CHECK-LABEL: define i32 @catch_all()
 ; CHECK: landingpad { i8*, i32 }
 ; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: call i8* (...)* @llvm.eh.actions(i32 1, i8* null, i8* null, i8* blockaddress(@catch_all, %catch.all))
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@catch_all, %catch.all))
 ; CHECK-NEXT: indirectbr {{.*}} [label %catch.all]
 ;
 ; CHECK: catch.all:
@@ -94,7 +94,7 @@ eh.resume:
 ; CHECK-LABEL: define i32 @except_phi()
 ; CHECK: landingpad { i8*, i32 }
 ; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...)* @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i8* null, i8* blockaddress(@except_phi, %return))
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@except_phi, %return))
 ; CHECK-NEXT: indirectbr {{.*}} [label %return]
 ;
 ; CHECK: return:
@@ -128,9 +128,9 @@ eh.resume:
 ; CHECK: landingpad { i8*, i32 }
 ; CHECK-NEXT: cleanup
 ; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...)* @llvm.eh.actions(
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(
 ; CHECK: i32 0, void (i8*, i8*)* @cleanup_and_except.cleanup,
-; CHECK: i32 1, i8* bitcast (i32 ()* @filt to i8*), i8* null, i8* blockaddress(@cleanup_and_except, %return))
+; CHECK: i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@cleanup_and_except, %return))
 ; CHECK-NEXT: indirectbr {{.*}} [label %return]
 ;
 ; CHECK: return:
diff --git a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
index 664da5e..c45469d 100644
--- a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
@@ -4,7 +4,7 @@
 define void @test() {
 bb.i:
 	%tmp.i660 = load <4 x float>, <4 x float>* null		; <<4 x float>> [#uses=1]
-	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+	call void (i32, ...) @printf( i32 0, i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
 	%tmp152.i = load <4 x i32>, <4 x i32>* null		; <<4 x i32>> [#uses=1]
 	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index 6b062d5..dd67064 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -15,11 +15,11 @@ entry:
 	]
 
 bb:		; preds = %entry
-	%tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8], [14 x i8]* @str, i32 0, i64 0) )		; <i32> [#uses=0]
+	%tmp1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([14 x i8], [14 x i8]* @str, i32 0, i64 0) )		; <i32> [#uses=0]
 	ret i32 0
 
 bb2:		; preds = %entry
-	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8], [13 x i8]* @str.upgrd.1, i32 0, i64 0) )		; <i32> [#uses=0]
+	%tmp4 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([13 x i8], [13 x i8]* @str.upgrd.1, i32 0, i64 0) )		; <i32> [#uses=0]
 	ret i32 0
 
 UnifiedReturnBlock:		; preds = %entry
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 9adfff3..b6a8fc0 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -51,7 +51,7 @@ entry:
         %tmp20 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]
         %tmp21 = load double, double* %tmp20            ; <double> [#uses=1]
         %tmp.upgrd.6 = getelementptr [9 x i8], [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
-        %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
+        %tmp.upgrd.7 = call i32 (i8*, ...) @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
         br label %finish
 finish:
         %retval.upgrd.8 = load i32, i32* %retval             ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
index f81b303..2c3c5c9 100644
--- a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
+++ b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
@@ -21,55 +21,55 @@ entry:
 	]
 
 bb:		; preds = %entry
-	call void (...)* @foo1( )
+	call void (...) @foo1( )
 	ret void
 
 bb1:		; preds = %entry
-	call void (...)* @foo2( )
+	call void (...) @foo2( )
 	ret void
 
 bb2:		; preds = %entry
-	call void (...)* @foo6( )
+	call void (...) @foo6( )
 	ret void
 
 bb3:		; preds = %entry
-	call void (...)* @foo3( )
+	call void (...) @foo3( )
 	ret void
 
 bb4:		; preds = %entry
-	call void (...)* @foo4( )
+	call void (...) @foo4( )
 	ret void
 
 bb5:		; preds = %entry
-	call void (...)* @foo5( )
+	call void (...) @foo5( )
 	ret void
 
 bb6:		; preds = %entry
-	call void (...)* @foo1( )
+	call void (...) @foo1( )
 	ret void
 
 bb7:		; preds = %entry
-	call void (...)* @foo2( )
+	call void (...) @foo2( )
 	ret void
 
 bb8:		; preds = %entry
-	call void (...)* @foo6( )
+	call void (...) @foo6( )
 	ret void
 
 bb9:		; preds = %entry
-	call void (...)* @foo3( )
+	call void (...) @foo3( )
 	ret void
 
 bb10:		; preds = %entry
-	call void (...)* @foo4( )
+	call void (...) @foo4( )
 	ret void
 
 bb11:		; preds = %entry
-	call void (...)* @foo5( )
+	call void (...) @foo5( )
 	ret void
 
 bb12:		; preds = %entry
-	call void (...)* @foo6( )
+	call void (...) @foo6( )
 	ret void
 }
 
diff --git a/test/CodeGen/X86/2007-02-16-BranchFold.ll b/test/CodeGen/X86/2007-02-16-BranchFold.ll
index 596021a..22e0a4e 100644
--- a/test/CodeGen/X86/2007-02-16-BranchFold.ll
+++ b/test/CodeGen/X86/2007-02-16-BranchFold.ll
@@ -60,7 +60,7 @@ bb.i9.i.i932.ce:		; preds = %newFuncRoot
 	%tmp1.i6.i = getelementptr %struct.operator, %struct.operator* %tmp66.i62.i, i32 0, i32 2		; <i32*> [#uses=1]
 	%tmp2.i7.i = load i32, i32* %tmp1.i6.i		; <i32> [#uses=1]
 	%tmp3.i8.i = load %struct.FILE*, %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
-	%tmp5.i9.i = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp3.i8.i, i8* getelementptr ([11 x i8], [11 x i8]* @str1, i32 0, i32 0), i32 %tmp2.i7.i )		; <i32> [#uses=0]
+	%tmp5.i9.i = call i32 (%struct.FILE*, i8*, ...) @fprintf( %struct.FILE* %tmp3.i8.i, i8* getelementptr ([11 x i8], [11 x i8]* @str1, i32 0, i32 0), i32 %tmp2.i7.i )		; <i32> [#uses=0]
 	%tmp7.i10.i = getelementptr %struct.operator, %struct.operator* %tmp66.i62.i, i32 0, i32 5		; <i32*> [#uses=1]
 	%tmp8.i11.i = load i32, i32* %tmp7.i10.i		; <i32> [#uses=7]
 	br label %NodeBlock5
diff --git a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
index 5d2c01a..a9b85b9 100644
--- a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
@@ -7,7 +7,7 @@
 
 define void @__eprintf(i8* %string, i8* %expression, i32 %line, i8* %filename) {
 	%tmp = load %struct._IO_FILE*, %struct._IO_FILE** @stderr
-	%tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )
+	%tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )
 	%tmp6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr
 	%tmp7 = tail call i32 @fflush( %struct._IO_FILE* %tmp6 )
 	tail call void @abort( )
diff --git a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
index e6eaa57..0edf139 100644
--- a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
+++ b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
@@ -6,7 +6,7 @@
 define void @test() {
 bb.i:
 	%tmp.i660 = load <4 x float>, <4 x float>* null		; <<4 x float>> [#uses=1]
-	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+	call void (i32, ...) @printf( i32 0, i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
 	%tmp152.i = load <4 x i32>, <4 x i32>* null		; <<4 x i32>> [#uses=1]
 	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
index ecc5835..9ce5f5a 100644
--- a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
@@ -19,7 +19,7 @@ cond_true109:		; preds = %entry
 
 cond_next164:		; preds = %cond_true109
 	%tmp176 = call signext i16 @GetParamDesc( %struct.XDesc* null, i32 1701999219, i32 1413830740, %struct.XDesc* null ) 
-	call void (i64, i8*, ...)* @r_raise( i64 0, i8* null )
+	call void (i64, i8*, ...) @r_raise( i64 0, i8* null )
 	unreachable
 
 cond_true239:		; preds = %cond_true109
diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index b19f445..c8660f7 100644
--- a/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -30,7 +30,7 @@ cond_true425:		; preds = %bb383
 	%tmp432 = fsub float %tmp430, %tmp408		; <float> [#uses=1]
 	%tmp432433 = fpext float %tmp432 to double		; <double> [#uses=1]
 	%tmp434435 = fpext float %tmp408 to double		; <double> [#uses=1]
-	call void (i8*, ...)* @PR_LogPrint( i8* getelementptr ([56 x i8], [56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
+	call void (i8*, ...) @PR_LogPrint( i8* getelementptr ([56 x i8], [56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
 	ret i32 0
 
 cond_next443:		; preds = %bb383
diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
index f2ae922..c6eb6f0 100644
--- a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -362,7 +362,7 @@ bb1159:		; preds = %cond_next1150
 
 cond_true1169:		; preds = %bb1159
 	%tmp11741175 = trunc i64 %lsum.11225.0 to i32		; <i32> [#uses=1]
-	%tmp1178 = tail call i32 (%struct._IO_FILE*  , i8*  , ...)* @fprintf( %struct._IO_FILE* noalias %file  , i8* getelementptr ([49 x i8], [49 x i8]* @.str32, i32 0, i64 0)  , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
+	%tmp1178 = tail call i32 (%struct._IO_FILE*  , i8*  , ...) @fprintf( %struct._IO_FILE* noalias %file  , i8* getelementptr ([49 x i8], [49 x i8]* @.str32, i32 0, i64 0)  , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
 	ret void
 
 UnifiedReturnBlock:		; preds = %bb1159
diff --git a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
index 019c442..a20fb47 100644
--- a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
@@ -30,7 +30,7 @@ bb37:           ; preds = %bb37.loopexit, %entry
         %hash.0.reg2mem.1 = phi i32 [ %phitmp, %bb37.loopexit ], [ 0, %entry ]          ; <i32> [#uses=1]
         store i32 %hash.0.reg2mem.1, i32* null, align 8
         %tmp75 = tail call i32 null( %struct.dentry* %dir, %struct.qstr* %name )                ; <i32> [#uses=0]
-        %tmp84 = tail call i32 (...)* @d_lookup( %struct.dentry* %dir, %struct.qstr* %name )            ; <i32> [#uses=0]
+        %tmp84 = tail call i32 (...) @d_lookup( %struct.dentry* %dir, %struct.qstr* %name )            ; <i32> [#uses=0]
         ret %struct.dentry* null
 }
 
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index efb87f2..ef69bd0 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -213,7 +213,7 @@ bb456:		; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
 	%tmp460461 = fpext float %iftmp.7.0 to double		; <double> [#uses=1]
 	%tmp462463 = fpext float %iftmp.14.0 to double		; <double> [#uses=1]
 	%tmp464465 = fpext float %iftmp.0.0 to double		; <double> [#uses=1]
-	%tmp467 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([48 x i8], [48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind 		; <i32> [#uses=0]
+	%tmp467 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([48 x i8], [48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind 		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
index a758fed..f21a6f3 100644
--- a/test/CodeGen/X86/2008-04-09-BranchFolding.ll
+++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
@@ -39,7 +39,7 @@ bb226.i:		; preds = %bb73.i
 bb273.i:		; preds = %bb226.i
 	ret %struct.tree_node* null
 bb260:		; preds = %bb226.i
-	tail call void (i8*, i32, ...)* @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind 
+	tail call void (i8*, i32, ...) @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind 
 	ret %struct.tree_node* null
 bb344:		; preds = %bb174
 	ret %struct.tree_node* null
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index f83c990..b526591 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -43,7 +43,7 @@ entry:
 	%tmp105 = load %struct.NSArray*, %struct.NSArray** null, align 8		; <%struct.NSArray*> [#uses=1]
 	%tmp107 = load %struct.NSObject*, %struct.NSObject** null, align 8		; <%struct.NSObject*> [#uses=1]
 	call void null( %struct.NSObject* %tmp107, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_228", %struct.NSArray* %tmp105, i8 signext  0 )
-	%tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null )		; <%struct.NSObject*> [#uses=0]
+	%tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...) @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null )		; <%struct.NSObject*> [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index df5ceb0..0669a32 100644
--- a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -64,7 +64,7 @@ entry:
 	br i1 %toBool, label %bb, label %bb27
 
 bb:		; preds = %entry
-	call void (...)* @abort( ) noreturn nounwind 
+	call void (...) @abort( ) noreturn nounwind 
 	unreachable
 
 bb27:		; preds = %entry
@@ -77,7 +77,7 @@ bb27:		; preds = %entry
 	br i1 %toBool33, label %bb34, label %bb35
 
 bb34:		; preds = %bb27
-	call void (...)* @abort( ) noreturn nounwind 
+	call void (...) @abort( ) noreturn nounwind 
 	unreachable
 
 bb35:		; preds = %bb27
@@ -98,7 +98,7 @@ bb35:		; preds = %bb27
 	br i1 %toBool49, label %bb50, label %bb51
 
 bb50:		; preds = %bb35
-	call void (...)* @abort( ) noreturn nounwind 
+	call void (...) @abort( ) noreturn nounwind 
 	unreachable
 
 bb51:		; preds = %bb35
@@ -119,7 +119,7 @@ bb51:		; preds = %bb35
 	br i1 %toBool65, label %bb66, label %bb67
 
 bb66:		; preds = %bb51
-	call void (...)* @abort( ) noreturn nounwind 
+	call void (...) @abort( ) noreturn nounwind 
 	unreachable
 
 bb67:		; preds = %bb51
@@ -132,7 +132,7 @@ bb67:		; preds = %bb51
 	br i1 %toBool73, label %bb74, label %bb75
 
 bb74:		; preds = %bb67
-	call void (...)* @abort( ) noreturn nounwind 
+	call void (...) @abort( ) noreturn nounwind 
 	unreachable
 
 bb75:		; preds = %bb67
diff --git a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
index 42752eb..a1b9d9d 100644
--- a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
@@ -26,7 +26,7 @@ bb31:		; preds = %bb6
 	br label %bb33
 
 bb33:		; preds = %bb31, %bb
-	tail call void (%struct.SV*, i8*, ...)* @Perl_sv_catpvf( %struct.SV* %dsv, i8* getelementptr ([8 x i8], [8 x i8]* @"\01LC25", i32 0, i64 0), i64 %0 ) nounwind 
+	tail call void (%struct.SV*, i8*, ...) @Perl_sv_catpvf( %struct.SV* %dsv, i8* getelementptr ([8 x i8], [8 x i8]* @"\01LC25", i32 0, i64 0), i64 %0 ) nounwind 
 	unreachable
 
 bb40:		; preds = %entry
diff --git a/test/CodeGen/X86/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll
index 3a74b48..a030fbe 100644
--- a/test/CodeGen/X86/2008-08-06-CmpStride.ll
+++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll
@@ -13,7 +13,7 @@ forbody:
         %sub14 = sub i32 1027, %i.0             ; <i32> [#uses=1]
         %mul15 = mul i32 %sub14, 10             ; <i32> [#uses=1]
         %add166 = or i32 %mul15, 1              ; <i32> [#uses=1] *
-        call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
+        call i32 (i8*, ...) @printf( i8* noalias  getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
         %inc = add i32 %i.0, 1          ; <i32> [#uses=3]
         %cmp = icmp ne i32 %inc, 1027          ; <i1> [#uses=1]
         br i1 %cmp, label %forbody, label %afterfor
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index d939207..2910902 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -37,7 +37,7 @@ if.then:                                          ; preds = %entry
   ret i32 0
 
 if.end:                                           ; preds = %entry
-  %call = tail call i32 (...)* @_Unwind_ForcedUnwind_Phase2() nounwind
+  %call = tail call i32 (...) @_Unwind_ForcedUnwind_Phase2() nounwind
   store i32 %call, i32* @a, align 4
   %tobool1 = icmp eq i32 %call, 0
   br i1 %tobool1, label %cond.end, label %cond.true
diff --git a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
index c80fbdd..9a1a3dd 100644
--- a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
+++ b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
@@ -58,7 +58,7 @@ ifend.i:		; preds = %lor_rhs.i
 safe_mod_int16_t_s_s.exit:		; preds = %ifend.i, %lor_rhs.i, %func_106.exit27
 	%call31 = phi i16 [ %conv8.i, %ifend.i ], [ %conv, %func_106.exit27 ], [ %conv, %lor_rhs.i ]		; <i16> [#uses=1]
 	%conv4 = sext i16 %call31 to i32		; <i32> [#uses=1]
-	%call5 = tail call i32 (...)* @func_104( i32 %conv4 )		; <i32> [#uses=0]
+	%call5 = tail call i32 (...) @func_104( i32 %conv4 )		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
index 635194f..8c46bb3 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
@@ -9,7 +9,7 @@ entry:
 	%1 = load i16, i16* @g_15, align 2		; <i16> [#uses=1]
 	%2 = zext i16 %1 to i32		; <i32> [#uses=1]
 	%3 = and i32 %2, 1		; <i32> [#uses=1]
-	%4 = tail call i32 (...)* @rshift_u_s( i32 1 ) nounwind		; <i32> [#uses=1]
+	%4 = tail call i32 (...) @rshift_u_s( i32 1 ) nounwind		; <i32> [#uses=1]
 	%5 = icmp slt i32 %4, 2		; <i1> [#uses=1]
 	%6 = zext i1 %5 to i32		; <i32> [#uses=1]
 	%7 = icmp sge i32 %3, %6		; <i1> [#uses=1]
@@ -17,7 +17,7 @@ entry:
 	%9 = load i16, i16* @g_15, align 2		; <i16> [#uses=1]
 	%10 = icmp eq i16 %9, 0		; <i1> [#uses=1]
 	%11 = zext i1 %10 to i32		; <i32> [#uses=1]
-	%12 = tail call i32 (...)* @func_20( i32 1 ) nounwind		; <i32> [#uses=1]
+	%12 = tail call i32 (...) @func_20( i32 1 ) nounwind		; <i32> [#uses=1]
 	%13 = icmp sge i32 %11, %12		; <i1> [#uses=1]
 	%14 = zext i1 %13 to i32		; <i32> [#uses=1]
 	%15 = sub i32 %8, %14		; <i32> [#uses=1]
@@ -27,7 +27,7 @@ entry:
 	%or.cond = or i1 false, %18		; <i1> [#uses=1]
 	%19 = select i1 %or.cond, i32 0, i32 %0		; <i32> [#uses=1]
 	%.0 = lshr i32 %17, %19		; <i32> [#uses=1]
-	%20 = tail call i32 (...)* @func_7( i32 %.0 ) nounwind		; <i32> [#uses=0]
+	%20 = tail call i32 (...) @func_7( i32 %.0 ) nounwind		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 92eb1c8..757dff4 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -38,7 +38,7 @@ bb12:		; preds = %bb11, %entry
 	%.014.in = phi i8 [ %10, %bb11 ], [ %7, %entry ]		; <i8> [#uses=1]
 	%11 = icmp ne i8 %.014.in, 0		; <i1> [#uses=1]
 	%12 = zext i1 %11 to i32		; <i32> [#uses=1]
-	%13 = tail call i32 (...)* @func_48( i32 %12, i32 %3, i32 0 ) nounwind		; <i32> [#uses=0]
+	%13 = tail call i32 (...) @func_48( i32 %12, i32 %3, i32 0 ) nounwind		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/2008-10-11-CallCrash.ll b/test/CodeGen/X86/2008-10-11-CallCrash.ll
index a859bc6..9ad7ab2 100644
--- a/test/CodeGen/X86/2008-10-11-CallCrash.ll
+++ b/test/CodeGen/X86/2008-10-11-CallCrash.ll
@@ -6,13 +6,13 @@ target triple = "i386-apple-darwin7"
 
 define i32 @func_45(i64 %p_46, i32 %p_48) nounwind {
 entry:
-	%0 = tail call i32 (...)* @lshift_s_u(i64 %p_46, i64 0) nounwind		; <i32> [#uses=0]
+	%0 = tail call i32 (...) @lshift_s_u(i64 %p_46, i64 0) nounwind		; <i32> [#uses=0]
 	%1 = load i32, i32* @g_385, align 4		; <i32> [#uses=1]
 	%2 = shl i32 %1, 1		; <i32> [#uses=1]
 	%3 = and i32 %2, 32		; <i32> [#uses=1]
-	%4 = tail call i32 (...)* @func_87(i32 undef, i32 %p_48, i32 1) nounwind		; <i32> [#uses=1]
+	%4 = tail call i32 (...) @func_87(i32 undef, i32 %p_48, i32 1) nounwind		; <i32> [#uses=1]
 	%5 = add i32 %3, %4		; <i32> [#uses=1]
-	%6 = tail call i32 (...)* @div_rhs(i32 %5) nounwind		; <i32> [#uses=0]
+	%6 = tail call i32 (...) @div_rhs(i32 %5) nounwind		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
index 4d3f8c2..c285ae4 100644
--- a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
@@ -3,7 +3,7 @@
 
 define i32 @func_77(i8 zeroext %p_79) nounwind {
 entry:
-	%0 = tail call i32 (...)* @func_43(i32 1) nounwind		; <i32> [#uses=1]
+	%0 = tail call i32 (...) @func_43(i32 1) nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb3, label %bb
 
@@ -14,7 +14,7 @@ bb3:		; preds = %bb, %entry
 	%p_79_addr.0 = phi i8 [ 0, %bb ], [ %p_79, %entry ]		; <i8> [#uses=1]
 	%2 = zext i8 %p_79_addr.0 to i32		; <i32> [#uses=2]
 	%3 = zext i1 false to i32		; <i32> [#uses=2]
-	%4 = tail call i32 (...)* @rshift_u_s(i32 1) nounwind		; <i32> [#uses=0]
+	%4 = tail call i32 (...) @rshift_u_s(i32 1) nounwind		; <i32> [#uses=0]
 	%5 = lshr i32 %2, %2		; <i32> [#uses=3]
 	%6 = icmp eq i32 0, 0		; <i1> [#uses=1]
 	br i1 %6, label %bb6, label %bb9
diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
index 4ee4b4a..c8fad06 100644
--- a/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/test/CodeGen/X86/2008-11-06-testb.ll
@@ -18,7 +18,7 @@ entry:
 	br i1 %4, label %bb5, label %bb
 
 bb:		; preds = %entry
-	%5 = tail call i32 (...)* @xx() nounwind		; <i32> [#uses=1]
+	%5 = tail call i32 (...) @xx() nounwind		; <i32> [#uses=1]
 	ret i32 %5
 
 bb5:		; preds = %entry
diff --git a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
index 6dca141..03442d6 100644
--- a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
+++ b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
@@ -8,7 +8,7 @@ entry:
 	br i1 %cmp, label %if.end, label %if.then
 
 if.then:		; preds = %entry
-	%call = call i32 (...)* @b()		; <i32> [#uses=0]
+	%call = call i32 (...) @b()		; <i32> [#uses=0]
 	br label %if.end
 
 if.end:		; preds = %if.then, %entry
diff --git a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
index 105489e..cf292e3 100644
--- a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
+++ b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
@@ -10,6 +10,6 @@ declare i32 @printk(i8*, ...)
 define void @display_cacheinfo(%struct.cpuinfo_x86* %c) nounwind section ".cpuinit.text" {
 entry:
         %asmtmp = tail call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 -2147483643, i32 0) nounwind          ; <{ i32, i32, i32, i32 }> [#uses=0]
-        %0 = tail call i32 (i8*, ...)* @printk(i8* getelementptr ([70 x i8], [70 x i8]* @.str10, i32 0, i64 0), i32 0, i32 0, i32 0, i32 0) nounwind           ; <i32> [#uses=0]
+        %0 = tail call i32 (i8*, ...) @printk(i8* getelementptr ([70 x i8], [70 x i8]* @.str10, i32 0, i64 0), i32 0, i32 0, i32 0, i32 0) nounwind           ; <i32> [#uses=0]
         unreachable
 }
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 7ac2cd2..6bb29fd 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -16,7 +16,7 @@ entry:
 	%1 = trunc i64 %u to i32		; <i32> [#uses=4]
 	%2 = lshr i64 %u, 32		; <i64> [#uses=1]
 	%3 = trunc i64 %2 to i32		; <i32> [#uses=2]
-	%4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i32 0), i32 %1) nounwind		; <i32> [#uses=0]
+	%4 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i32 0), i32 %1) nounwind		; <i32> [#uses=0]
 	%5 = icmp ult i32 %1, %0		; <i1> [#uses=1]
 	br i1 %5, label %bb2, label %bb
 
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index db31333..172a00a 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "7 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "9 machine-licm"
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
@@ -21,9 +21,9 @@ bb4:		; preds = %bb.i, %bb26, %bb4, %entry
 ; CHECK: xorl
 ; CHECK: movq
 
-	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
+	%0 = call i32 (...) @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
 	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
-	%1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind		; <i32> [#uses=1]
+	%1 = call i32 (...) @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind		; <i32> [#uses=1]
 	%cond = icmp eq i32 %1, 1		; <i1> [#uses=1]
 	br i1 %cond, label %bb26, label %bb4
 
diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
index 79c0863..367a6d2 100644
--- a/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -15,11 +15,11 @@ bb1579.i.i:		; preds = %bb1514.i.i, %bb191.i.i
         br i1 %tmp178, label %hello, label %world
 
 hello:
-	%h = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8], [7 x i8]* @hello, i32 0, i32 0))
+	%h = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @hello, i32 0, i32 0))
         ret void
 
 world:
-	%w = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8], [7 x i8]* @world, i32 0, i32 0))
+	%w = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @world, i32 0, i32 0))
         ret void
 }
 
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
index 4362ba4..a3607c6 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-undermydesk-freebsd8.0"
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
 entry:
-        %call = tail call i32 (...)* @getpid()          ; <i32> [#uses=1]
+        %call = tail call i32 (...) @getpid()          ; <i32> [#uses=1]
         %conv = trunc i32 %call to i16          ; <i16> [#uses=1]
         %0 = tail call i16 asm "xchgb ${0:h}, ${0:b}","=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv) nounwind           ; <i16> [#uses=0]
         ret i32 undef
diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index 1e5e933..8055ea8 100644
--- a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -21,7 +21,7 @@ entry:
 	store i8 %5, i8* %7, align 1
 	%8 = getelementptr %struct.X, %struct.X* %xxx, i32 0, i32 0		; <i8*> [#uses=1]
 	store i8 15, i8* %8, align 1
-	%9 = call i32 (...)* bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind		; <i32> [#uses=1]
+	%9 = call i32 (...) bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind		; <i32> [#uses=1]
 	store i32 %9, i32* %0, align 4
 	%10 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	store i32 %10, i32* %retval, align 4
diff --git a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
index 6e062fb..89cd24d 100644
--- a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
+++ b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
@@ -7,7 +7,7 @@ entry:
         %tmp5.i = extractelement <1 x i64> %a, i32 0
         %tmp11 = bitcast i64 %tmp5.i to <1 x i64>
         %tmp8 = extractelement <1 x i64> %tmp11, i32 0
-        %call6 = call i32 (i64)* @foo(i64 %tmp8)
+        %call6 = call i32 (i64) @foo(i64 %tmp8)
         ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
index fac6a66..45e770f 100644
--- a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -53,7 +53,7 @@ bb5:                                              ; preds = %bb4, %bb3
 bb6.preheader:                                    ; preds = %bb5
   %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
   %22 = load volatile i32, i32* @uint8, align 4        ; <i32> [#uses=0]
-  %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+  %23 = tail call i32 (...) @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
   unreachable
 
 return:                                           ; preds = %bb5
diff --git a/test/CodeGen/X86/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll
index c783ee9..374c696 100644
--- a/test/CodeGen/X86/2009-10-16-Scope.ll
+++ b/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -9,7 +9,7 @@ entry:
   br label %do.body, !dbg !0
 
 do.body:                                          ; preds = %entry
-  call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !MDExpression()), !dbg !MDLocation(scope: !5)
   %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
   %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
   br label %do.end, !dbg !0
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index e0fd9b0..b03556a 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -51,5 +51,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !17 = distinct !MDLexicalBlock(line: 11, column: 0, file: !19, scope: !1)
 !18 = !{!1}
 !19 = !MDFile(filename: "b2.c", directory: "/tmp/")
-!20 = !{i32 0}
+!20 = !{}
 !21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index ced3708..7c0fd68 100644
--- a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -8,7 +8,7 @@
 
 define i32 @"main(tart.core.String[])->int32"(i32 %args) {
 entry:
-  tail call void @llvm.dbg.value(metadata %tart.reflect.ComplexType* @.type.SwitchStmtTest, i64 0, metadata !8, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata %tart.reflect.ComplexType* @.type.SwitchStmtTest, i64 0, metadata !8, metadata !MDExpression()), !dbg !MDLocation(scope: !9)
   tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
   ret i32 3
 }
diff --git a/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
index 4e4e006..6fe31b6 100644
--- a/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
+++ b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
@@ -23,7 +23,7 @@ for.body:                                         ; preds = %if.end40, %entry
 
 if.then:                                          ; preds = %for.body
   %conv18 = sext i8 %tmp6 to i32                  ; <i32> [#uses=1]
-  %call = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0]
+  %call = tail call i32 (...) @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0]
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %for.body
@@ -34,7 +34,7 @@ if.end:                                           ; preds = %if.then, %for.body
 
 if.then36:                                        ; preds = %if.end
   %conv38 = sext i8 %tmp24 to i32                 ; <i32> [#uses=1]
-  %call39 = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0]
+  %call39 = tail call i32 (...) @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0]
   br label %if.end40
 
 if.end40:                                         ; preds = %if.then36, %if.end
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 43f05ca..29df291 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -10,10 +10,10 @@
 
 define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
 entry:
-  tail call void @llvm.dbg.value(metadata float %a, i64 0, metadata !0, metadata !MDExpression())
-  tail call void @llvm.dbg.value(metadata float %b, i64 0, metadata !11, metadata !MDExpression())
-  tail call void @llvm.dbg.value(metadata float %c, i64 0, metadata !12, metadata !MDExpression())
-  tail call void @llvm.dbg.value(metadata float %d, i64 0, metadata !13, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata float %a, i64 0, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
+  tail call void @llvm.dbg.value(metadata float %b, i64 0, metadata !11, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
+  tail call void @llvm.dbg.value(metadata float %c, i64 0, metadata !12, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
+  tail call void @llvm.dbg.value(metadata float %d, i64 0, metadata !13, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   %0 = tail call float @fabsf(float %c) nounwind readnone, !dbg !19 ; <float> [#uses=1]
   %1 = tail call float @fabsf(float %d) nounwind readnone, !dbg !19 ; <float> [#uses=1]
   %2 = fcmp olt float %0, %1, !dbg !19            ; <i1> [#uses=1]
@@ -247,5 +247,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !44 = !{!1}
 !45 = !MDFile(filename: "libgcc2.c", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
 !46 = !MDFile(filename: "libgcc2.h", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
-!47 = !{i32 0}
+!47 = !{}
 !48 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index b8f7ba2..fe68711 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin10"
 
 define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !MDExpression()), !dbg !MDLocation(scope: !9)
   %0 = getelementptr inbounds %struct.a, %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
   %1 = load i32, i32* %0, align 8, !dbg !28            ; <i32> [#uses=1]
   tail call void @foo(i32 %1) nounwind optsize noinline ssp, !dbg !28
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index f2e8dbd..097cd24 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -4,8 +4,8 @@
 
 define i32 @foo(i32 %y) nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !0, metadata !MDExpression())
-  %0 = tail call i32 (...)* @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
+  tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
+  %0 = tail call i32 (...) @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
   ret i32 %0, !dbg !9
 }
 
@@ -15,9 +15,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 define i32 @bar(i32 %x) nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !MDExpression())
-  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !MDExpression()) nounwind
-  %0 = tail call i32 (...)* @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !MDExpression()), !dbg !MDLocation(scope: !8)
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !MDExpression()) nounwind, !dbg !MDLocation(scope: !1)
+  %0 = tail call i32 (...) @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
   %1 = add nsw i32 %0, %x, !dbg !13               ; <i32> [#uses=1]
   ret i32 %1, !dbg !13
 }
@@ -44,7 +44,7 @@ entry:
 !16 = !{!7}
 !17 = !{!1, !8}
 !18 = !MDFile(filename: "f.c", directory: "/tmp")
-!19 = !{i32 0}
+!19 = !{}
 
 ;CHECK: DEBUG_VALUE: bar:x <- E
 ;CHECK: Ltmp
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index b0185ba..942faf4 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -10,8 +10,8 @@ target triple = "x86_64-apple-darwin10.2"
 define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
 ;CHECK: DEBUG_VALUE: baz:this <- RDI{{$}}
 entry:
-  tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !15, metadata !MDExpression())
-  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !16, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !15, metadata !MDExpression()), !dbg !MDLocation(scope: !8)
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !16, metadata !MDExpression()), !dbg !MDLocation(scope: !8)
   %0 = mul nsw i32 %x, 7, !dbg !29                ; <i32> [#uses=1]
   %1 = add nsw i32 %0, 1, !dbg !29                ; <i32> [#uses=1]
   ret i32 %1, !dbg !29
@@ -55,6 +55,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !29 = !MDLocation(line: 16, scope: !30)
 !30 = distinct !MDLexicalBlock(line: 15, column: 0, file: !31, scope: !8)
 !31 = !MDFile(filename: "foo.cp", directory: "/tmp/")
-!32 = !{i32 0}
+!32 = !{}
 !33 = !{!1, !8, !18}
 !34 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
index 198eb31..0b1c36f 100644
--- a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
+++ b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
@@ -18,7 +18,7 @@ entry:
   %0 = call i32 asm "bsr   $1, $0\0A\09cmovz $2, $0", "=&r,ro,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %zero, i32 -1) nounwind, !srcloc !0 ; <i32> [#uses=1]
   store i32 %0, i32* %v
   %tmp = load i32, i32* %v                             ; <i32> [#uses=1]
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0]
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0]
   store i32 0, i32* %retval
   %1 = load i32, i32* %retval                          ; <i32> [#uses=1]
   ret i32 %0
diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 1a05d0a..ab9715d 100644
--- a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -29,7 +29,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
   %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1]
-  %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
+  %call5 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index 6bd1217..d3ad860 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -125,5 +125,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !44 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13)
 !45 = !MDLocation(line: 27, scope: !39)
 !47 = !MDFile(filename: "small.cc", directory: "/Users/manav/R8248330")
-!48 = !{i32 0}
+!48 = !{}
 !49 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index fa4fd75..30abdc5 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -31,5 +31,5 @@ entry:
 !13 = !{!0, !6}
 !14 = !MDFile(filename: "", directory: "/private/tmp")
 !15 = !MDFile(filename: "bug.c", directory: "/private/tmp")
-!16 = !{i32 0}
+!16 = !{}
 !17 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 783b34d..d920513 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -36,5 +36,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !15 = !{!0}
 !16 = !{!6}
 !17 = !MDFile(filename: "one.c", directory: "/private/tmp")
-!18 = !{i32 0}
+!18 = !{}
 !19 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index c6a3a78..c02bd2d 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -60,7 +60,7 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 if.then:                                          ; preds = %cond.end
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @str, i64 0, i64 0))
-  %call12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
+  %call12 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
   ret i32 1, !dbg !27
 
 return:                                           ; preds = %cond.end
@@ -110,5 +110,5 @@ declare i32 @puts(i8* nocapture) nounwind
 !29 = !{!10, !11, !12}
 !30 = !{!14, !17}
 !31 = !MDFile(filename: "rem_small.c", directory: "/private/tmp")
-!32 = !{i32 0}
+!32 = !{}
 !33 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2011-02-23-UnfoldBug.ll b/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
index 900106a..90b90d7 100644
--- a/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
+++ b/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
@@ -22,7 +22,7 @@ for.body33.lr.ph:                                 ; preds = %for.body
 for.end:                                          ; preds = %for.body
   %vecins.i94 = insertelement <2 x double> undef, double 0.000000e+00, i32 0
   %cmpsd.i = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %vecins.i94, <2 x double> <double 0x3FE984B204153B34, double 0x3FE984B204153B34>, i8 2) nounwind
-  tail call void (...)* @_mm_movemask_pd(<2 x double> %cmpsd.i) nounwind
+  tail call void (...) @_mm_movemask_pd(<2 x double> %cmpsd.i) nounwind
   br i1 undef, label %if.then67, label %if.end71
 
 if.then67:                                        ; preds = %for.end
diff --git a/test/CodeGen/X86/2011-03-02-DAGCombiner.ll b/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
index 86e579a..d25fbf7 100644
--- a/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
+++ b/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
@@ -43,7 +43,7 @@ entry:
   %14 = and i32 %13, -129
   %15 = or i32 %14, %12
   store i32 %15, i32* %10, align 4
-  %call = call i32 (...)* @iequals(i32 1841, i32 %bf.value, i32 0)
+  %call = call i32 (...) @iequals(i32 1841, i32 %bf.value, i32 0)
   %16 = load i32, i32* %retval
   ret i32 %16
 }
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index a086a79..b8e5100 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -144,7 +144,7 @@ if.end117.i:                                      ; preds = %if.then108.i, %land
   br i1 undef, label %if.then122.i, label %for.cond138.preheader.i
 
 if.then122.i:                                     ; preds = %if.end117.i
-  call void (...)* @fprintf(i32 undef, i32 %gs.0526.i, i32 %ge.1.i, i32 %aFreq.1.i, double undef) nounwind
+  call void (...) @fprintf(i32 undef, i32 %gs.0526.i, i32 %ge.1.i, i32 %aFreq.1.i, double undef) nounwind
   br label %for.cond138.preheader.i
 
 for.cond138.preheader.i:                          ; preds = %if.then122.i, %if.end117.i
diff --git a/test/CodeGen/X86/2011-10-12-MachineCSE.ll b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
index 5018db7..341a14b 100644
--- a/test/CodeGen/X86/2011-10-12-MachineCSE.ll
+++ b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
@@ -102,7 +102,7 @@ if.end:                                           ; preds = %lor.lhs.false23
   %arrayidx38 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom37
   %genfun = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx38, i32 0, i32 2
   %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)*, %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8
-  %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...)* %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
+  %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...) %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index da3c322..07dff95 100644
--- a/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -26,7 +26,7 @@ entry:
 }
 
 ; CHECK-LABEL: zero_test
-; CHECK: pxor %xmm0, %xmm0
+; CHECK: xorps %xmm0, %xmm0
 ; CHECK: ret
 
 define void @zero_test() {
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 75409f2..677c902 100644
--- a/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -1,12 +1,25 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+; RUN: llc < %s -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
 
-; CHECK: endless_loop
 define void @endless_loop() {
+; CHECK-LABEL: endless_loop:
+; CHECK-NEXT:     # BB#0:
+; CHECK-NEXT:	vmovaps	(%eax), %ymm0
+; CHECK-NEXT:	vextractf128	$1, %ymm0, %xmm0
+; CHECK-NEXT:	vmovsldup	%xmm0, %xmm0    # xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT:	vmovddup	%xmm0, %xmm1    # xmm1 = xmm0[0,0]
+; CHECK-NEXT:	vinsertf128	$1, %xmm1, %ymm0, %ymm1
+; CHECK-NEXT:	vxorps	%xmm2, %xmm2, %xmm2
+; CHECK-NEXT:	vblendps	$128, %ymm1, %ymm2, %ymm1 # ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
+; CHECK-NEXT:	vxorps	%ymm2, %ymm2, %ymm2
+; CHECK-NEXT:	vblendps	$1, %ymm0, %ymm2, %ymm0 # ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
+; CHECK-NEXT:	vmovaps	%ymm0, (%eax)
+; CHECK-NEXT:	vmovaps	%ymm1, (%eax)
+; CHECK-NEXT:	vzeroupper
+; CHECK-NEXT:	retl
 entry:
   %0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
   %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
   store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
   ret void
-; CHECK: ret
 }
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
index f33fc8c..a366102 100644
--- a/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -6,7 +6,7 @@ entry:
 ; CHECK: pmovzxwd
   %A27 = load <4 x i16>, <4 x i16>* %in, align 4
   %A28 = add <4 x i16> %A27, %A27
-; CHECK: movlpd
+; CHECK: movq
   store <4 x i16> %A28, <4 x i16>* %in, align 4
   ret void
 ; CHECK: ret
@@ -18,7 +18,7 @@ define void @store_64(<2 x i32>* %ptr) {
 BB:
   store <2 x i32> zeroinitializer, <2 x i32>* %ptr
   ret void
-;CHECK: movlpd
+;CHECK: movlps
 ;CHECK: ret
 }
 
diff --git a/test/CodeGen/X86/2012-09-28-CGPBug.ll b/test/CodeGen/X86/2012-09-28-CGPBug.ll
index 57af20e..a8e0625 100644
--- a/test/CodeGen/X86/2012-09-28-CGPBug.ll
+++ b/test/CodeGen/X86/2012-09-28-CGPBug.ll
@@ -35,7 +35,7 @@ define void @h(i8*) nounwind ssp {
   indirectbr i8* %16, [label %17, label %18]
 
 ; <label>:17                                      ; preds = %11
-  tail call void (i8*, ...)* @g(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str40, i32 0, i32 0))
+  tail call void (i8*, ...) @g(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str40, i32 0, i32 0))
   br label %22
 
 ; <label>:18                                      ; preds = %11
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
index a9b8cc6..d1c0266 100644
--- a/test/CodeGen/X86/2012-1-10-buildvector.ll
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -1,27 +1,28 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+; RUN: llc < %s -mattr=+avx -mtriple=i686-unknown-unknown | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
-target triple = "i686-pc-win32"
-
-;CHECK-LABEL: bad_cast:
 define void @bad_cast() {
-entry:
+; CHECK-LABEL: bad_cast:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovaps %xmm0, (%eax)
+; CHECK-NEXT:    movl $0, (%eax)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retl
   %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
   %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
   store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
-;CHECK: ret
   ret void
 }
 
-
-;CHECK-LABEL: bad_insert:
 define void @bad_insert(i32 %t) {
-entry:
-;CHECK: vxorps %ymm1, %ymm1, %ymm1
-;CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; CHECK-LABEL: bad_insert:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovaps %ymm0, (%eax)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retl
   %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
   store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
-;CHECK: ret
   ret void
 }
 
diff --git a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
index df4f028..ed1daad 100644
--- a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
+++ b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
@@ -2,7 +2,7 @@
 
 ; CHECK: merge_stores_can
 ; CHECK: callq foo
-; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK: xorps %xmm0, %xmm0
 ; CHECK-NEXT: movups  %xmm0
 ; CHECK: callq foo
 ; CHECK: ret
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index 84e77a8..d175fab 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -16,7 +16,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
 entry:
-  call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !MDExpression()), !dbg !MDLocation(scope: !14)
   %type = getelementptr inbounds %struct.node.0.27, %struct.node.0.27* %p, i64 0, i32 0
   %0 = load i16, i16* %type, align 2
   %cmp = icmp eq i16 %0, 1
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index b7124c9..08ade9c 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -43,14 +43,14 @@ if.then3344:
   br label %if.then4073
 
 if.then4073:                                      ; preds = %if.then3344
-  call void @llvm.dbg.declare(metadata [20 x i8]* %num14075, metadata !4, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata [20 x i8]* %num14075, metadata !4, metadata !MDExpression()), !dbg !MDLocation(scope: !5)
   %arraydecay4078 = getelementptr inbounds [20 x i8], [20 x i8]* %num14075, i64 0, i64 0
   %0 = load i32, i32* undef, align 4
   %add4093 = add nsw i32 %0, 0
   %conv4094 = sitofp i32 %add4093 to float
   %div4095 = fdiv float %conv4094, 5.670000e+02
   %conv4096 = fpext float %div4095 to double
-  %call4097 = call i32 (i8*, i32, i64, i8*, ...)* @__sprintf_chk(i8* %arraydecay4078, i32 0, i64 20, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str15, i64 0, i64 0), double %conv4096) nounwind
+  %call4097 = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %arraydecay4078, i32 0, i64 20, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str15, i64 0, i64 0), double %conv4096) nounwind
   br i1 %cmp1733, label %if.then4107, label %if.else4114
 
 if.then4107:                                      ; preds = %if.then4073
@@ -108,7 +108,7 @@ cond.true:                                        ; preds = %entry
   unreachable
 
 cond.end:                                         ; preds = %entry
-  call void @llvm.dbg.declare(metadata %"class.__gnu_cxx::hash_map"* %X, metadata !31, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata %"class.__gnu_cxx::hash_map"* %X, metadata !31, metadata !MDExpression()), !dbg !MDLocation(scope: !37)
   %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map", %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5
   invoke void @_Znwm()
           to label %exit.i unwind label %lpad2.i.i.i.i
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index 5bcff57..871c68f 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -20,7 +20,7 @@ if.then:                                          ; preds = %entry
   unreachable
 
 if.end:                                           ; preds = %entry
-  call void @llvm.dbg.declare(metadata %struct.btCompoundLeafCallback* %callback, metadata !3, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata %struct.btCompoundLeafCallback* %callback, metadata !3, metadata !MDExpression()), !dbg !MDLocation(scope: !2)
   %m = getelementptr inbounds %struct.btCompoundLeafCallback, %struct.btCompoundLeafCallback* %callback, i64 0, i32 1
   store i32 0, i32* undef, align 8
   %cmp12447 = icmp sgt i32 undef, 0
diff --git a/test/CodeGen/X86/2014-08-29-CompactUnwind.ll b/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
index 3d9dc57..120eba7 100644
--- a/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
+++ b/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
@@ -36,7 +36,7 @@ print_shadow_bytes.exit.i: ; preds = %print_shadow_bytes.exit.i, %0
   %reg16 = getelementptr inbounds [3 x i8], [3 x i8]* %.str..str1.i, i64 0, i64 0
   %reg17 = shl i64 %iv.i, 1
   %reg19 = inttoptr i64 %reg17 to i8*
-  call void (i64*, i8*, ...)* @append(i64* %str.i, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str2, i64 0, i64 0), i8* %reg16, i8* %reg19)
+  call void (i64*, i8*, ...) @append(i64* %str.i, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str2, i64 0, i64 0), i8* %reg16, i8* %reg19)
   %iv.next.i = add nsw i64 %iv.i, 0
   br label %print_shadow_bytes.exit.i
 }
diff --git a/test/CodeGen/X86/GC/dynamic-frame-size.ll b/test/CodeGen/X86/GC/dynamic-frame-size.ll
new file mode 100644
index 0000000..a3583d4
--- /dev/null
+++ b/test/CodeGen/X86/GC/dynamic-frame-size.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @use(<4 x i8*>*)
+
+; Test that a frame which requires dynamic relocation produces a stack map
+; with a size of UINT64_MAX.
+define void @test(i8* %ptr) gc "erlang" {
+   ; 32 byte alignment (for the alloca) is larger than the default
+   ; 16 byte alignment
+   %slot = alloca <4 x i8*>
+   call void @use(<4 x i8*>* %slot);
+   ret void
+}
+
+; CHECK: .note.gc
+; CHECK-NEXT: .align 8
+; safe point count
+; CHECK .short	1
+; CHECK .long	.Ltmp0
+; stack frame size (in words)
+; CHECK .short	-1
+; stack arity (arguments on the stack)
+; CHECK .short	0
+; live root count
+; CHECK .short	0
+
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 265fec4..79db445 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -49,5 +49,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !18 = !{!1}
 !19 = !{!6, !7, !10}
 !20 = !MDFile(filename: "a.c", directory: "/private/tmp")
-!21 = !{i32 0}
+!21 = !{}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/StackColoring-dbg.ll b/test/CodeGen/X86/StackColoring-dbg.ll
index da4d58a..7ac08d1 100644
--- a/test/CodeGen/X86/StackColoring-dbg.ll
+++ b/test/CodeGen/X86/StackColoring-dbg.ll
@@ -17,7 +17,7 @@ entry:
 for.body:
   call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
   call void @llvm.lifetime.start(i64 -1, i8* %x.i) nounwind
-  call void @llvm.dbg.declare(metadata i8* %x.i, metadata !22, metadata !MDExpression()) nounwind
+  call void @llvm.dbg.declare(metadata i8* %x.i, metadata !22, metadata !MDExpression()) nounwind, !dbg !MDLocation(scope: !2)
   br label %for.body
 }
 
@@ -27,7 +27,7 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!23}
-!0 = !MDCompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2)
+!0 = !MDCompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !1, enums: !{}, retainedTypes: !{})
 !1 = !MDFile(filename: "t.c", directory: "")
 !16 = !MDBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !2 = !MDSubprogram()
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
index 9c24be4..44b587a 100644
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -4,43 +4,26 @@
 define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
 ; CHECK-LABEL: test1:
-; CHECK: cmpl %ecx, %eax 
-; CHECK-NOT: addl
-; CHECK: adcl $0, %eax
-  %add4 = add i32 %x, %sum
-  %cmp = icmp ult i32 %add4, %x
-  %inc = zext i1 %cmp to i32
-  %z.0 = add i32 %add4, %inc
-  ret i32 %z.0
-}
-
-; Instcombine transforms test1 into test2:
-; CHECK-LABEL: test2:
 ; CHECK: movl
 ; CHECK-NEXT: addl
 ; CHECK-NEXT: adcl $0
 ; CHECK-NEXT: ret
-define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
-entry:
-  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
-  %0 = extractvalue { i32, i1 } %uadd, 0
-  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
   %inc = zext i1 %cmp to i32
-  %z.0 = add i32 %0, %inc
+  %z.0 = add i32 %add4, %inc
   ret i32 %z.0
 }
 
 ; <rdar://problem/12579915>
-define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
+define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
 entry:
   %cmp = icmp ugt i32 %x, %y
   %dec = sext i1 %cmp to i32
   %dec.res = add nsw i32 %dec, %res
   ret i32 %dec.res
-; CHECK-LABEL: test3:
+; CHECK-LABEL: test2:
 ; CHECK: cmpl
 ; CHECK: sbbl
 ; CHECK: ret
 }
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 360f141..3f19a06 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -69,7 +69,7 @@ entry:
    %tmp0 = load i32, i32* @bar_i
    %tmp2 = call i32 @foo_f()
    %tmp3 = add i32 %tmp, %tmp2
-   %tmp4 = call %FunTy* @bar_f()
+   %tmp4 = call i32 @bar_f()
    %tmp5 = add i32 %tmp3, %tmp4
    %tmp6 = add i32 %tmp1, %tmp5
    %tmp7 = add i32 %tmp6, %tmp0
diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
index 836b5f1..ec39522 100644
--- a/test/CodeGen/X86/and-or-fold.ll
+++ b/test/CodeGen/X86/and-or-fold.ll
@@ -21,6 +21,6 @@ entry:
   %tmp1 = and i64 %x, 123127
   %tmp2 = or i64 %tmp1, 3
   ret i64 %tmp2
-; DARWIN-OPT:       andq $123124
+; DARWIN-OPT:       andl $123124
 ; DARWIN-OPT-NEXT:  leaq 3
 }
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
index 640237d..d9e676a 100644
--- a/test/CodeGen/X86/andimm8.ll
+++ b/test/CodeGen/X86/andimm8.ll
@@ -17,3 +17,15 @@ define void @foo(i64 %zed, i64* %x) nounwind {
   store i64 %t2, i64* %x, align 8
   ret void
 }
+
+define i64 @bar(i64 %zed) nounwind {
+; CHECK:  andl     $42, %edi               # encoding: [0x83,0xe7,0x2a]
+  %t1 = and i64 %zed, 42
+  ret i64 %t1
+}
+
+define i64 @baz(i64 %zed) nounwind {
+; CHECK:  andl $2147483647, %edi      # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f]
+  %t1 = and i64 %zed, 2147483647
+  ret i64 %t1
+}
diff --git a/test/CodeGen/X86/anyregcc-crash.ll b/test/CodeGen/X86/anyregcc-crash.ll
index 3abe3d1..a7c104e 100644
--- a/test/CodeGen/X86/anyregcc-crash.ll
+++ b/test/CodeGen/X86/anyregcc-crash.ll
@@ -7,7 +7,7 @@ define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
                         i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
                         i64 %v13, i64 %v14, i64 %v15, i64 %v16) {
 entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16,
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16,
                 i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
                 i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
                 i64 %v13, i64 %v14, i64 %v15, i64 %v16)
diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll
index 98ba17c..129aadf 100644
--- a/test/CodeGen/X86/anyregcc.ll
+++ b/test/CodeGen/X86/anyregcc.ll
@@ -60,7 +60,7 @@
 ; CHECK-NEXT:   .long 3
 define i64 @test() nounwind ssp uwtable {
 entry:
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3)
   ret i64 0
 }
 
@@ -82,7 +82,7 @@ entry:
 define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 12297829382473034410 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 15, i8* %f, i32 1, i8* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 1, i32 15, i8* %f, i32 1, i8* %obj)
   ret i64 %ret
 }
 
@@ -105,7 +105,7 @@ define i64 @property_access2() nounwind ssp uwtable {
 entry:
   %obj = alloca i64, align 8
   %f = inttoptr i64 12297829382473034410 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %f, i32 1, i64* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %f, i32 1, i64* %obj)
   ret i64 %ret
 }
 
@@ -128,7 +128,7 @@ define i64 @property_access3() nounwind ssp uwtable {
 entry:
   %obj = alloca i64, align 8
   %f = inttoptr i64 12297829382473034410 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 15, i8* %f, i32 0, i64* %obj)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 3, i32 15, i8* %f, i32 0, i64* %obj)
   ret i64 %ret
 }
 
@@ -210,7 +210,7 @@ entry:
 define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 12297829382473034410 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
   ret i64 %ret
 }
 
@@ -292,7 +292,7 @@ entry:
 define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
   %f = inttoptr i64 12297829382473034410 to i8*
-  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
   ret i64 %ret
 }
 
@@ -320,7 +320,7 @@ entry:
 ; CHECK-NEXT: .long  0
 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
   tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
   ret i64 %result
 }
@@ -360,7 +360,7 @@ entry:
 define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
   tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
-  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
   ret i64 %result
 }
 
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index 11b4e68..c6b1c39 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -48,7 +48,7 @@ define void @atomic_fetch_and64() nounwind {
 ; X64:       lock
 ; X64:       andq $3
   %t2 = atomicrmw and  i64* @sc64, i64 5 acquire
-; X64:       andq
+; X64:       andl
 ; X64:       lock
 ; X64:       cmpxchgq
   %t3 = atomicrmw and  i64* @sc64, i64 %t2 acquire
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index 5d00ed0..d82cf94 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -11,7 +11,7 @@
 
 define i8* @test(i8* %Q, i32* %L) nounwind {
 entry:
-	%tmp = tail call i32 (...)* @foo() nounwind		; <i32> [#uses=2]
+	%tmp = tail call i32 (...) @foo() nounwind		; <i32> [#uses=2]
 	%tmp1 = inttoptr i32 %tmp to i8*		; <i8*> [#uses=1]
 	br label %bb1
 
diff --git a/test/CodeGen/X86/avx-bitcast.ll b/test/CodeGen/X86/avx-bitcast.ll
index bb3e5a5..e34c20f 100644
--- a/test/CodeGen/X86/avx-bitcast.ll
+++ b/test/CodeGen/X86/avx-bitcast.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 
-; CHECK: vmovsd (%
-; CHECK-NEXT: vmovq %xmm
 define i64 @bitcasti64tof64() {
+; CHECK-LABEL: bitcasti64tof64:
+; CHECK:       # BB#0:
+; CHECK:         vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vmovq %xmm0, %rax
+; CHECK-NEXT:    retq
   %a = load double, double* undef
   %b = bitcast double %a to i64
   ret i64 %b
diff --git a/test/CodeGen/X86/avx-cvt-2.ll b/test/CodeGen/X86/avx-cvt-2.ll
index 8cc7190..583c7d5 100644
--- a/test/CodeGen/X86/avx-cvt-2.ll
+++ b/test/CodeGen/X86/avx-cvt-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
 
 ; Check that we generate vector conversion from float to narrower int types
 
@@ -8,8 +8,16 @@
 
 define void @fptoui16(%f32vec_t %a, %i16vec_t *%p) {
 ; CHECK-LABEL: fptoui16:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vmovdqa %xmm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %b = fptoui %f32vec_t %a to %i16vec_t
   store %i16vec_t %b, %i16vec_t * %p
   ret void
@@ -17,8 +25,16 @@ define void @fptoui16(%f32vec_t %a, %i16vec_t *%p) {
 
 define void @fptosi16(%f32vec_t %a, %i16vec_t *%p) {
 ; CHECK-LABEL: fptosi16:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vmovdqa %xmm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %b = fptosi %f32vec_t %a to %i16vec_t
   store %i16vec_t %b, %i16vec_t * %p
   ret void
@@ -26,8 +42,17 @@ define void @fptosi16(%f32vec_t %a, %i16vec_t *%p) {
 
 define void @fptoui8(%f32vec_t %a, %i8vec_t *%p) {
 ; CHECK-LABEL: fptoui8:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; CHECK-NEXT:    vmovq %xmm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %b = fptoui %f32vec_t %a to %i8vec_t
   store %i8vec_t %b, %i8vec_t * %p
   ret void
@@ -35,8 +60,17 @@ define void @fptoui8(%f32vec_t %a, %i8vec_t *%p) {
 
 define void @fptosi8(%f32vec_t %a, %i8vec_t *%p) {
 ; CHECK-LABEL: fptosi8:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; CHECK-NEXT:    vmovq %xmm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %b = fptosi %f32vec_t %a to %i8vec_t
   store %i8vec_t %b, %i8vec_t * %p
   ret void
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 9f154ab..6df3e53 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -1,84 +1,122 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 
-; CHECK: vcvtdq2ps %ymm
 define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
+; CHECK-LABEL: sitofp00:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %b = sitofp <8 x i32> %a to <8 x float>
   ret <8 x float> %b
 }
 
-; CHECK: vcvttps2dq %ymm
 define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
+; CHECK-LABEL: fptosi00:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %b = fptosi <8 x float> %a to <8 x i32>
   ret <8 x i32> %b
 }
 
-; CHECK: vcvtdq2pd %xmm
 define <4 x double> @sitofp01(<4 x i32> %a) {
+; CHECK-LABEL: sitofp01:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %b = sitofp <4 x i32> %a to <4 x double>
   ret <4 x double> %b
 }
 
-; CHECK: vcvtdq2ps %ymm
 define <8 x float> @sitofp02(<8 x i16> %a) {
+; CHECK-LABEL: sitofp02:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm1
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %b = sitofp <8 x i16> %a to <8 x float>
   ret <8 x float> %b
 }
 
-; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
+; CHECK-LABEL: fptosi01:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %b = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
 }
 
-; CHECK: vcvtpd2psy %ymm
-; CHECK-NEXT: vcvtpd2psy %ymm
-; CHECK-NEXT: vinsertf128 $1
 define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
+; CHECK-LABEL: fptrunc00:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtpd2psy %ymm0, %xmm0
+; CHECK-NEXT:    vcvtpd2psy %ymm1, %xmm1
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %a = fptrunc <8 x double> %b to <8 x float>
   ret <8 x float> %a
 }
 
-; CHECK: vcvtps2pd %xmm
 define <4 x double> @fpext00(<4 x float> %b) nounwind {
+; CHECK-LABEL: fpext00:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %a = fpext <4 x float> %b to <4 x double>
   ret <4 x double> %a
 }
 
-; CHECK: vcvtsi2sdq (%
 define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
-entry:
+; CHECK-LABEL: funcA:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %tmp1 = load i64, i64* %e, align 8
   %conv = sitofp i64 %tmp1 to double
   ret double %conv
 }
 
-; CHECK: vcvtsi2sdl (%
 define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
-entry:
+; CHECK-LABEL: funcB:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %tmp1 = load i32, i32* %e, align 4
   %conv = sitofp i32 %tmp1 to double
   ret double %conv
 }
 
-; CHECK: vcvtsi2ssl (%
 define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
-entry:
+; CHECK-LABEL: funcC:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %tmp1 = load i32, i32* %e, align 4
   %conv = sitofp i32 %tmp1 to float
   ret float %conv
 }
 
-; CHECK: vcvtsi2ssq  (%
 define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
-entry:
+; CHECK-LABEL: funcD:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %tmp1 = load i64, i64* %e, align 8
   %conv = sitofp i64 %tmp1 to float
   ret float %conv
 }
 
-; CHECK: vcvtss2sd
 define void @fpext() nounwind uwtable {
-entry:
+; CHECK-LABEL: fpext:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
   %f = alloca float, align 4
   %d = alloca double, align 8
   %tmp = load float, float* %f, align 4
@@ -88,16 +126,20 @@ entry:
 }
 
 define double @nearbyint_f64(double %a) {
-; CHECK-LABEL: nearbyint_f64
-; CHECK: vroundsd $12
+; CHECK-LABEL: nearbyint_f64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call double @llvm.nearbyint.f64(double %a)
   ret double %res
 }
 declare double @llvm.nearbyint.f64(double %p)
 
 define float @floor_f32(float %a) {
-; CHECK-LABEL: floor_f32
-; CHECK: vroundss $1
+; CHECK-LABEL: floor_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vroundss $1, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call float @llvm.floor.f32(float %a)
   ret float %res
 }
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index a70d45a..83585b5 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -1,147 +1,224 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 
 ;;; Shift left
-; CHECK: vpslld
-; CHECK: vpslld
-define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+define <8 x i32> @vshift00(<8 x i32> %a) {
+; CHECK-LABEL: vshift00:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpslld $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpslld $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
 2>
   ret <8 x i32> %s
 }
 
-; CHECK: vpsllw
-; CHECK: vpsllw
-define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+define <16 x i16> @vshift01(<16 x i16> %a) {
+; CHECK-LABEL: vshift01:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   ret <16 x i16> %s
 }
 
-; CHECK: vpsllq
-; CHECK: vpsllq
-define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+define <4 x i64> @vshift02(<4 x i64> %a) {
+; CHECK-LABEL: vshift02:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
   ret <4 x i64> %s
 }
 
 ;;; Logical Shift right
-; CHECK: vpsrld
-; CHECK: vpsrld
-define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+define <8 x i32> @vshift03(<8 x i32> %a) {
+; CHECK-LABEL: vshift03:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
 2>
   ret <8 x i32> %s
 }
 
-; CHECK: vpsrlw
-; CHECK: vpsrlw
-define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+define <16 x i16> @vshift04(<16 x i16> %a) {
+; CHECK-LABEL: vshift04:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   ret <16 x i16> %s
 }
 
-; CHECK: vpsrlq
-; CHECK: vpsrlq
-define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+define <4 x i64> @vshift05(<4 x i64> %a) {
+; CHECK-LABEL: vshift05:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
   ret <4 x i64> %s
 }
 
 ;;; Arithmetic Shift right
-; CHECK: vpsrad
-; CHECK: vpsrad
-define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+define <8 x i32> @vshift06(<8 x i32> %a) {
+; CHECK-LABEL: vshift06:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
 2>
   ret <8 x i32> %s
 }
 
-; CHECK: vpsraw
-; CHECK: vpsraw
-define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+define <16 x i16> @vshift07(<16 x i16> %a) {
+; CHECK-LABEL: vshift07:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   ret <16 x i16> %s
 }
 
-; CHECK: vpsrlw
-; CHECK: pand
-; CHECK: pxor
-; CHECK: psubb
-; CHECK: vpsrlw
-; CHECK: pand
-; CHECK: pxor
-; CHECK: psubb
-define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift09(<32 x i8> %a) {
+; CHECK-LABEL: vshift09:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; CHECK-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; CHECK-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
+; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   ret <32 x i8> %s
 }
 
-; CHECK: pxor
-; CHECK: pcmpgtb
-; CHECK: pcmpgtb
-define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift10(<32 x i8> %a) {
+; CHECK-LABEL: vshift10:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %s
 }
 
-; CHECK: vpsrlw
-; CHECK: pand
-; CHECK: vpsrlw
-; CHECK: pand
-define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift11(<32 x i8> %a) {
+; CHECK-LABEL: vshift11:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   ret <32 x i8> %s
 }
 
-; CHECK: vpsllw
-; CHECK: pand
-; CHECK: vpsllw
-; CHECK: pand
-define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift12(<32 x i8> %a) {
+; CHECK-LABEL: vshift12:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpsllw $2, %xmm1, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   ret <32 x i8> %s
 }
 
 ;;; Support variable shifts
-; CHECK: _vshift08
-; CHECK: vpslld $23
-; CHECK: vextractf128 $1
-; CHECK: vpslld $23
-; CHECK: ret
-define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
+define <8 x i32> @vshift08(<8 x i32> %a)  {
+; CHECK-LABEL: vshift08:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpslld $23, %xmm0, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
+; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vcvttps2dq %xmm1, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpslld $23, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
   ret <8 x i32> %bitop
 }
 
 ; PR15141
-; CHECK: _vshift13:
-; CHECK-NOT: vpsll
-; CHECK-NOT: vcvttps2dq
-; CHECK: vpmulld
 define <4 x i32> @vshift13(<4 x i32> %in) {
+; CHECK-LABEL: vshift13:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
   ret <4 x i32> %T
 }
 
 ;;; Uses shifts for sign extension
-; CHECK: _sext_v16i16
-; CHECK: vpsllw
-; CHECK: vpsraw
-; CHECK: vpsllw
-; CHECK: vpsraw
-; CHECK: vinsertf128
-define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+define <16 x i16> @sext_v16i16(<16 x i16> %a)  {
+; CHECK-LABEL: sext_v16i16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm1
+; CHECK-NEXT:    vpsraw $8, %xmm1, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $8, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %b = trunc <16 x i16> %a to <16 x i8>
   %c = sext <16 x i8> %b to <16 x i16>
   ret <16 x i16> %c
 }
 
-; CHECK: _sext_v8i32
-; CHECK: vpslld
-; CHECK: vpsrad
-; CHECK: vpslld
-; CHECK: vpsrad
-; CHECK: vinsertf128
-define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+define <8 x i32> @sext_v8i32(<8 x i32> %a)  {
+; CHECK-LABEL: sext_v8i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpslld $16, %xmm0, %xmm1
+; CHECK-NEXT:    vpsrad $16, %xmm1, %xmm1
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpslld $16, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrad $16, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %b = trunc <8 x i32> %a to <8 x i16>
   %c = sext <8 x i16> %b to <8 x i32>
   ret <8 x i32> %c
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
index f550733..7ce5e19 100644
--- a/test/CodeGen/X86/avx-varargs-x86_64.ll
+++ b/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -10,6 +10,6 @@ declare i32 @f(i32, ...)
 define void @test1() nounwind uwtable ssp {
 entry:
   %0 = load <8 x float>, <8 x float>* @x, align 32
-  %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
+  %call = call i32 (i32, ...) @f(i32 1, <8 x float> %0)
   ret void
 }
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll
index 9b82c88..9814a61 100644
--- a/test/CodeGen/X86/avx512-fma-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -1,50 +1,8 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
 
-define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_x86_vfmadd_ps_z
-  ; CHECK: vfmadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_mask_vfmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_vfmadd_ps
-  ; CHECK: vfmadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
-  ret <16 x float> %res
-}
-
-define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
-  ; CHECK-LABEL: test_x86_vfmadd_pd_z
-  ; CHECK: vfmadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
-  ret <8 x double> %res
-}
-
-define <8 x double> @test_mask_fmadd_pd(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: test_mask_fmadd_pd:
-; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
-  ret <8 x double> %res
-}
-
+declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
 declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-
-define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_x86_vfmsubps_z
-  ; CHECK: vfmsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_mask_vfmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsub_ps
-  ; CHECK: vfmsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
-  ret <16 x float> %res
-}
+declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
 
 define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfmsubpd_z
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 46581f7..07d984a 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -515,14 +515,6 @@ define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
 }
 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
-  ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
-  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
-                    <8 x i64>zeroinitializer, i8 -1)
-  ret <8 x i64> %res
-}
-declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
-
 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
   ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
   %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
@@ -1606,3 +1598,568 @@ define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8
                     <8 x double> zeroinitializer, i8 %mask, i32 3)
   ret <8 x double> %res
 }
+
+define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
+  ;CHECK-LABEL: test_xor_epi32
+  ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_xor_epi32
+  ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
+  ;CHECK-LABEL: test_or_epi32
+  ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_or_epi32
+  ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
+  ;CHECK-LABEL: test_and_epi32
+  ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_and_epi32
+  ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
+  ;CHECK-LABEL: test_xor_epi64
+  ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_xor_epi64
+  ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
+  ;CHECK-LABEL: test_or_epi64
+  ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_or_epi64
+  ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
+  ;CHECK-LABEL: test_and_epi64
+  ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_and_epi64
+  ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+
+define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
+  ;CHECK-LABEL: test_mask_add_epi32_rr
+  ;CHECK: vpaddd %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi32_rrk
+  ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi32_rrkz
+  ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+  ;CHECK-LABEL: test_mask_add_epi32_rm
+  ;CHECK: vpaddd (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi32_rmk
+  ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi32_rmkz
+  ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
+  ;CHECK-LABEL: test_mask_add_epi32_rmb
+  ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
+  %q = load i32, i32* %ptr_b
+  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi32_rmbk
+  ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
+  %q = load i32, i32* %ptr_b
+  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi32_rmbkz
+  ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
+  %q = load i32, i32* %ptr_b
+  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rr
+  ;CHECK: vpsubd %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rrk
+  ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rrkz
+  ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rm
+  ;CHECK: vpsubd (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rmk
+  ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rmkz
+  ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rmb
+  ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
+  %q = load i32, i32* %ptr_b
+  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rmbk
+  ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
+  %q = load i32, i32* %ptr_b
+  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
+  ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
+  %q = load i32, i32* %ptr_b
+  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+  ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
+  ;CHECK-LABEL: test_mask_add_epi64_rr
+  ;CHECK: vpaddq %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi64_rrk
+  ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi64_rrkz
+  ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
+  ;CHECK-LABEL: test_mask_add_epi64_rm
+  ;CHECK: vpaddq (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
+  %b = load <8 x i64>, <8 x i64>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi64_rmk
+  ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
+  %b = load <8 x i64>, <8 x i64>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi64_rmkz
+  ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
+  %b = load <8 x i64>, <8 x i64>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
+  ;CHECK-LABEL: test_mask_add_epi64_rmb
+  ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi64_rmbk
+  ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_add_epi64_rmbkz
+  ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rr
+  ;CHECK: vpsubq %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rrk
+  ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rrkz
+  ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rm
+  ;CHECK: vpsubq (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
+  %b = load <8 x i64>, <8 x i64>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rmk
+  ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
+  %b = load <8 x i64>, <8 x i64>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rmkz
+  ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
+  %b = load <8 x i64>, <8 x i64>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rmb
+  ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rmbk
+  ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
+  ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rr
+  ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rrk
+  ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rrkz
+  ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rm
+  ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rmk
+  ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rmkz
+  ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rmb
+  ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %b = bitcast <8 x i64> %b64 to <16 x i32>
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rmbk
+  ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %b = bitcast <8 x i64> %b64 to <16 x i32>
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
+  ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %b = bitcast <8 x i64> %b64 to <16 x i32>
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
+
+define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rr
+  ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rrk
+  ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rrkz
+  ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rm
+  ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rmk
+  ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rmkz
+  ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
+  %b = load <16 x i32>, <16 x i32>* %ptr_b
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rmb
+  ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %b = bitcast <8 x i64> %b64 to <16 x i32>
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rmbk
+  ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %b = bitcast <8 x i64> %b64 to <16 x i32>
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
+  ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
+  ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
+  %q = load i64, i64* %ptr_b
+  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %b = bitcast <8 x i64> %b64 to <16 x i32>
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+  ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index f1ef9ef..8b13e96 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -260,7 +260,7 @@ entry:
   %and = and i64 %x, 2147483647
   ret i64 %and
 ; CHECK-LABEL: bzhi64_small_constant_mask:
-; CHECK: andq  $2147483647, %r[[ARG1]]
+; CHECK: andl  $2147483647, %e[[ARG1]]
 }
 
 define i32 @blsi32(i32 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/bool-zext.ll b/test/CodeGen/X86/bool-zext.ll
index 3558376..c98ad9e 100644
--- a/test/CodeGen/X86/bool-zext.ll
+++ b/test/CodeGen/X86/bool-zext.ll
@@ -10,7 +10,7 @@
 define void @bar1(i1 zeroext %v1) nounwind ssp {
 entry:
   %conv = zext i1 %v1 to i32
-  %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+  %call = tail call i32 (...) @foo1(i32 %conv) nounwind
   ret void
 }
 
@@ -23,7 +23,7 @@ entry:
 define void @bar2(i8 zeroext %v1) nounwind ssp {
 entry:
   %conv = zext i8 %v1 to i32
-  %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+  %call = tail call i32 (...) @foo1(i32 %conv) nounwind
   ret void
 }
 
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 3ebe1a1..f4db3ba 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -17,11 +17,11 @@ entry:
   br i1 %4, label %bb1, label %bb
 
 bb:                                               ; preds = %entry
-  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  %5 = tail call i32 (...) @foo() nounwind       ; <i32> [#uses=1]
   ret i32 %5
 
 bb1:                                              ; preds = %entry
-  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  %6 = tail call i32 (...) @bar() nounwind       ; <i32> [#uses=1]
   ret i32 %6
 }
 
diff --git a/test/CodeGen/X86/byval-align.ll b/test/CodeGen/X86/byval-align.ll
index ac0ab75..8366ae3 100644
--- a/test/CodeGen/X86/byval-align.ll
+++ b/test/CodeGen/X86/byval-align.ll
@@ -18,7 +18,7 @@ entry:
   %1 = ptrtoint i8* %0 to i64                     ; <i64> [#uses=1]
   store i64 %1, i64* %p, align 8
   %2 = load i8*, i8** %ptr, align 8                    ; <i8*> [#uses=1]
-  %3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
+  %3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
   %4 = load i64, i64* %p, align 8                      ; <i64> [#uses=1]
   %5 = and i64 %4, 140737488355264                ; <i64> [#uses=1]
   %6 = load i64, i64* %p, align 8                      ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index 2d39901..c3e7b7e 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -6,8 +6,8 @@
 
 define i32 @main() nounwind  {
 entry:
-	tail call void (i32, ...)* @bar( i32 3, %struct.W* byval  @.cpx ) nounwind 
-	tail call void (i32, ...)* @baz( i32 3, %struct.W* byval  @B ) nounwind 
+	tail call void (i32, ...) @bar( i32 3, %struct.W* byval  @.cpx ) nounwind 
+	tail call void (i32, ...) @baz( i32 3, %struct.W* byval  @B ) nounwind 
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/cache-intrinsic.ll b/test/CodeGen/X86/cache-intrinsic.ll
index c023047..0b9d77a 100644
--- a/test/CodeGen/X86/cache-intrinsic.ll
+++ b/test/CodeGen/X86/cache-intrinsic.ll
@@ -10,10 +10,10 @@ define i32 @main() {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
   %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str1, i32 0, i32 0)) #3
   call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index a885183..f2f36b1 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -108,7 +108,7 @@ func_1.exit:                                      ; preds = %bb.i.i, %func_4.exi
   %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
   store i8 %g_96.tmp.0.i, i8* @g_96
   %6 = zext i8 %g_96.tmp.0.i to i32               ; <i32> [#uses=1]
-  %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8], [15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+  %7 = tail call i32 (i8*, ...) @printf(i8* noalias getelementptr ([15 x i8], [15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index 818138a..584179a 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -75,7 +75,7 @@ define i32 @test5(double %A) nounwind  {
  br i1 %bothcond, label %bb8, label %bb12
 
  bb8:; preds = %entry
- %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ %tmp9 = tail call i32 (...) @foo( ) nounwind ; <i32> [#uses=1]
  ret i32 %tmp9
 
  bb12:; preds = %entry
diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
index 13fb46b..62e0562 100644
--- a/test/CodeGen/X86/coalescer-remat.ll
+++ b/test/CodeGen/X86/coalescer-remat.ll
@@ -7,7 +7,7 @@ define i32 @main() nounwind {
 entry:
   %t0 = cmpxchg i64* @val, i64 0, i64 1 monotonic monotonic
   %0 = extractvalue { i64, i1 } %t0, 0
-  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind
+  %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 5fe5dc5..a95b84d 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -439,7 +439,7 @@ entry:
   %conv = uitofp i64 %sub to float
   %div = fmul float %conv, 5.000000e-01
   %conv2 = fpext float %div to double
-  tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }, { [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
+  tail call void (...) @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }, { [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
   ret void
 }
 declare void @_Z6PrintFz(...)
@@ -462,7 +462,7 @@ for.cond:                                         ; preds = %for.inc, %entry
   %cmp = icmp eq i32* undef, %3
   %conv2 = zext i1 %cmp to i32
   %and = and i32 %conv2, %0
-  tail call void (...)* @fn3(i32 %and) nounwind
+  tail call void (...) @fn3(i32 %and) nounwind
   %tobool = icmp eq i32 undef, 0
   br i1 %tobool, label %for.inc, label %if.then
 
diff --git a/test/CodeGen/X86/dag-optnone.ll b/test/CodeGen/X86/dag-optnone.ll
new file mode 100644
index 0000000..f7774e6
--- /dev/null
+++ b/test/CodeGen/X86/dag-optnone.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -O0 -mattr=+avx | FileCheck %s
+
+; Background:
+; If fast-isel bails out to normal selection, then the DAG combiner will run,
+; even at -O0. In principle this should not happen (those are optimizations,
+; and we said -O0) but as a practical matter there are some instruction
+; selection patterns that depend on the legalizations and transforms that the
+; DAG combiner does.
+;
+; The 'optnone' attribute implicitly sets -O0 and fast-isel for the function.
+; The DAG combiner was disabled for 'optnone' (but not -O0) by r221168, then
+; re-enabled in r233153 because of problems with instruction selection patterns
+; mentioned above. (Note: because 'optnone' is supposed to match -O0, r221168
+; really should have disabled the combiner for both.)
+;
+; If instruction selection eventually becomes smart enough to run without DAG
+; combiner, then the combiner can be turned off for -O0 (not just 'optnone')
+; and this test can go away. (To be replaced by a different test that verifies
+; the DAG combiner does *not* run at -O0 or for 'optnone' functions.)
+;
+; In the meantime, this test wants to make sure the combiner stays enabled for
+; 'optnone' functions, just as it is for -O0.
+
+
+; The test cases @foo[WithOptnone] prove that the same DAG combine happens
+; with -O0 and with 'optnone' set.  To prove this, we use a Windows triple to
+; cause fast-isel to bail out (because something about the calling convention
+; is not handled in fast-isel).  Then we have a repeated fadd that can be
+; combined into an fmul.  We show that this happens in both the non-optnone
+; function and the optnone function.
+
+define float @foo(float %x) #0 {
+entry:
+  %add = fadd fast float %x, %x
+  %add1 = fadd fast float %add, %x
+  ret float %add1
+}
+
+; CHECK-LABEL: @foo
+; CHECK-NOT:   add
+; CHECK:       mul
+; CHECK-NEXT:  ret
+
+define float @fooWithOptnone(float %x) #1 {
+entry:
+  %add = fadd fast float %x, %x
+  %add1 = fadd fast float %add, %x
+  ret float %add1
+}
+
+; CHECK-LABEL: @fooWithOptnone
+; CHECK-NOT:   add
+; CHECK:       mul
+; CHECK-NEXT:  ret
+
+
+; The test case @bar is derived from an instruction selection failure case
+; that was solved by r233153. It depends on -mattr=+avx.
+; Really all we're trying to prove is that it doesn't crash any more.
+
+@id84 = common global <16 x i32> zeroinitializer, align 64
+
+define void @bar() #1 {
+entry:
+  %id83 = alloca <16 x i8>, align 16
+  %0 = load <16 x i32>, <16 x i32>* @id84, align 64
+  %conv = trunc <16 x i32> %0 to <16 x i8>
+  store <16 x i8> %conv, <16 x i8>* %id83, align 16
+  ret void
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { noinline optnone "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/dagcombine-and-setcc.ll b/test/CodeGen/X86/dagcombine-and-setcc.ll
index bb2bfbe..57adc8b 100644
--- a/test/CodeGen/X86/dagcombine-and-setcc.ll
+++ b/test/CodeGen/X86/dagcombine-and-setcc.ll
@@ -39,7 +39,7 @@ ret2:
 define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
   %res = alloca i32, align 4
   %t = call i32 @foo(i32 1, i32 2, i32* %res) #3
-  %v = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %t)
+  %v = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %t)
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
index fe502bb..c5085a2 100644
--- a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
@@ -169,36 +169,36 @@ attributes #2 = { nounwind readnone }
 !53 = distinct !MDLexicalBlock(line: 14, column: 0, file: !1, scope: !51)
 !54 = !MDLocation(line: 16, scope: !53)
 !55 = !MDLocation(line: 17, scope: !24)
-!56 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38, inlinedAt: !55)
+!56 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
 !57 = !MDLocation(line: 0, scope: !40, inlinedAt: !55)
 !58 = !{i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)}
-!59 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15, inlinedAt: !55)
+!59 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
 !60 = !MDLocation(line: 5, scope: !40, inlinedAt: !55)
 !61 = !MDLocation(line: 5, scope: !62, inlinedAt: !55)
 !62 = distinct !MDLexicalBlock(line: 5, column: 0, file: !1, scope: !40)
 !63 = !MDLocation(line: 18, scope: !24)
-!64 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38, inlinedAt: !63)
+!64 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
 !65 = !MDLocation(line: 0, scope: !40, inlinedAt: !63)
-!66 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15, inlinedAt: !63)
+!66 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
 !67 = !MDLocation(line: 5, scope: !40, inlinedAt: !63)
 !68 = !MDLocation(line: 5, scope: !62, inlinedAt: !63)
 !69 = !MDLocation(line: 20, scope: !70)
 !70 = distinct !MDLexicalBlock(line: 20, column: 0, file: !1, scope: !24)
-!71 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38, inlinedAt: !72)
+!71 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
 !72 = !MDLocation(line: 21, scope: !70)
 !73 = !MDLocation(line: 0, scope: !35, inlinedAt: !72)
 !74 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0)}
-!75 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15, inlinedAt: !72)
+!75 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
 !76 = !MDLocation(line: 6, scope: !35, inlinedAt: !72)
-!77 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38, inlinedAt: !78)
+!77 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
 !78 = !MDLocation(line: 23, scope: !70)
 !79 = !MDLocation(line: 0, scope: !35, inlinedAt: !78)
 !80 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0)}
-!81 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15, inlinedAt: !78)
+!81 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
 !82 = !MDLocation(line: 6, scope: !35, inlinedAt: !78)
-!83 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38, inlinedAt: !84)
+!83 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
 !84 = !MDLocation(line: 24, scope: !24)
 !85 = !MDLocation(line: 0, scope: !35, inlinedAt: !84)
-!86 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15, inlinedAt: !84)
+!86 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
 !87 = !MDLocation(line: 6, scope: !35, inlinedAt: !84)
 !88 = !MDLocation(line: 25, scope: !24)
diff --git a/test/CodeGen/X86/dbg-changes-codegen.ll b/test/CodeGen/X86/dbg-changes-codegen.ll
index 6cdfdc2..8f95338 100644
--- a/test/CodeGen/X86/dbg-changes-codegen.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen.ll
@@ -44,7 +44,7 @@
 define zeroext i1 @_ZN3Foo3batEv(%struct.Foo* %this) #0 align 2 {
 entry:
   %0 = load %struct.Foo*, %struct.Foo** @pfoo, align 8
-  tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !MDExpression()), !dbg !MDLocation(scope: !MDSubprogram())
   %cmp.i = icmp eq %struct.Foo* %0, %this
   ret i1 %cmp.i
 }
@@ -53,7 +53,7 @@ entry:
 define void @_Z3bazv() #1 {
 entry:
   %0 = load %struct.Wibble*, %struct.Wibble** @wibble1, align 8
-  tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !MDExpression()), !dbg !MDLocation(scope: !MDSubprogram())
   %1 = load %struct.Wibble*, %struct.Wibble** @wibble2, align 8
   %cmp.i = icmp ugt %struct.Wibble* %1, %0
   br i1 %cmp.i, label %if.then.i, label %_ZN7Flibble3barEP6Wibble.exit
@@ -78,6 +78,6 @@ attributes #2 = { nounwind readnone }
 
 !17 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: null)
 !45 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!62 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "arg", line: 4, arg: 2, scope: null, type: !17)
+!62 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "arg", line: 4, arg: 2, scope: !MDSubprogram(), type: !17)
 !64 = !{%struct.Flibble* undef}
-!65 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: null, type: !45)
+!65 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !MDSubprogram(), type: !45)
diff --git a/test/CodeGen/X86/discontiguous-loops.ll b/test/CodeGen/X86/discontiguous-loops.ll
index fa7692b..20db750 100644
--- a/test/CodeGen/X86/discontiguous-loops.ll
+++ b/test/CodeGen/X86/discontiguous-loops.ll
@@ -40,7 +40,7 @@ ybb8:                                              ; preds = %ybb1
 
 bb10:                                             ; preds = %ybb8
   %tmp11 = load i8*, i8** undef, align 8               ; <i8*> [#uses=1]
-  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
+  call void (i8*, ...) @fatal(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
   unreachable
 
 ybb12:                                             ; preds = %ybb8
@@ -51,7 +51,7 @@ ybb13:                                             ; preds = %ybb12
   br i1 %tmp14, label %bb16, label %ybb1
 
 bb15:                                             ; preds = %ybb12
-  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
+  call void (i8*, ...) @fatal(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
   unreachable
 
 bb16:                                             ; preds = %ybb13
diff --git a/test/CodeGen/X86/dllimport-x86_64.ll b/test/CodeGen/X86/dllimport-x86_64.ll
index af15a86..7ee6b43 100644
--- a/test/CodeGen/X86/dllimport-x86_64.ll
+++ b/test/CodeGen/X86/dllimport-x86_64.ll
@@ -36,13 +36,13 @@ define void @use() nounwind {
 ; OPT-NOT: call void @inline1()
 ; OPT-NOT: call void @inline2()
 ; OPT-NOT: load i32, i32* @Var2
-; OPT: call void (...)* @dummy(i32 %1, i32 1)
+; OPT: call void (...) @dummy(i32 %1, i32 1)
 
 ; CHECK-DAG: movq __imp_Var1(%rip), [[R1:%[a-z]{3}]]
 ; CHECK-DAG: movq __imp_Var2(%rip), [[R2:%[a-z]{3}]]
   %1 = load i32, i32* @Var1
   %2 = load i32, i32* @Var2
-  call void(...)* @dummy(i32 %1, i32 %2)
+  call void(...) @dummy(i32 %1, i32 %2)
 
   ret void
 }
diff --git a/test/CodeGen/X86/dllimport.ll b/test/CodeGen/X86/dllimport.ll
index eb9484c..9db654f 100644
--- a/test/CodeGen/X86/dllimport.ll
+++ b/test/CodeGen/X86/dllimport.ll
@@ -47,13 +47,13 @@ define void @use() nounwind {
 ; OPT-NOT: call void @inline1()
 ; OPT-NOT: call void @inline2()
 ; OPT-NOT: load i32, i32* @Var2
-; OPT: call void (...)* @dummy(i32 %1, i32 1)
+; OPT: call void (...) @dummy(i32 %1, i32 1)
 
 ; CHECK-DAG: movl __imp__Var1, [[R1:%[a-z]{3}]]
 ; CHECK-DAG: movl __imp__Var2, [[R2:%[a-z]{3}]]
   %1 = load i32, i32* @Var1
   %2 = load i32, i32* @Var2
-  call void(...)* @dummy(i32 %1, i32 %2)
+  call void(...) @dummy(i32 %1, i32 %2)
 
   ret void
 }
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
index 6215519..7fcd530 100644
--- a/test/CodeGen/X86/early-ifcvt.ll
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -62,7 +62,7 @@ if.then37:
 
 if.end41:
   %exit_status.0 = phi i32 [ 2, %if.then29 ], [ 0, %if.then37 ], [ 66, %entry ]
-  call void (...)* @fprintf(i32 %exit_status.0) nounwind
+  call void (...) @fprintf(i32 %exit_status.0) nounwind
   unreachable
 }
 
diff --git a/test/CodeGen/X86/exedeps-movq.ll b/test/CodeGen/X86/exedeps-movq.ll
new file mode 100644
index 0000000..b702c87
--- /dev/null
+++ b/test/CodeGen/X86/exedeps-movq.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+; Verify that we select the correct version of the instruction that stores the low 64-bits
+; of a 128-bit vector. We want to avoid int/fp domain crossing penalties, so ignore the
+; bitcast ops and choose:
+;
+; movlps for floats
+; movlpd for doubles
+; movq for integers
+
+define void @store_floats(<4 x float> %x, i64* %p) {
+; SSE-LABEL: store_floats:
+; SSE:       # BB#0:
+; SSE-NEXT:    addps %xmm0, %xmm0
+; SSE-NEXT:    movlps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: store_floats:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %xmm0, %xmm0, %xmm0
+
+
+; !!! FIXME - the AVX version is not handled correctly.
+; AVX-NEXT:    vmovq %xmm0, (%rdi)
+
+
+; AVX-NEXT:    retq
+  %a = fadd <4 x float> %x, %x
+  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %c = bitcast <2 x float> %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
+
+define void @store_double(<2 x double> %x, i64* %p) {
+; SSE-LABEL: store_double:
+; SSE:       # BB#0:
+; SSE-NEXT:    addpd %xmm0, %xmm0
+; SSE-NEXT:    movlpd %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: store_double:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX-NEXT:    retq
+  %a = fadd <2 x double> %x, %x
+  %b = extractelement <2 x double> %a, i32 0
+  %c = bitcast double %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
+
+define void @store_int(<4 x i32> %x, <2 x float>* %p) {
+; SSE-LABEL: store_int:
+; SSE:       # BB#0:
+; SSE-NEXT:    paddd %xmm0, %xmm0
+; SSE-NEXT:    movq %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: store_int:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovq %xmm0, (%rdi)
+; AVX-NEXT:    retq
+  %a = add <4 x i32> %x, %x
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %c = bitcast <2 x i32> %b to <2 x float>
+  store <2 x float> %c, <2 x float>* %p
+  ret void
+}
+
diff --git a/test/CodeGen/X86/extern_weak.ll b/test/CodeGen/X86/extern_weak.ll
index 01e32aa..c2ff09f 100644
--- a/test/CodeGen/X86/extern_weak.ll
+++ b/test/CodeGen/X86/extern_weak.ll
@@ -5,7 +5,7 @@
 declare extern_weak i32 @X(i8*)
 
 define void @bar() {
-        tail call void (...)* @foo( )
+        tail call void (...) @foo( )
         ret void
 }
 
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index d72a31c..589de76 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -23,14 +23,15 @@ exit:		; preds = %next
 
 define void @test2(i8* %a) nounwind {
 entry:
+; clang uses i8 constants for booleans, so we test with an i8 1.
 ; CHECK-LABEL: test2:
 ; CHECK: movb {{.*}} %al
 ; CHECK-NEXT: xorb $1, %al
 ; CHECK-NEXT: testb $1
   %tmp = load i8, i8* %a, align 1
-  %tobool = trunc i8 %tmp to i1
-  %tobool2 = xor i1 %tobool, true
-  br i1 %tobool2, label %if.then, label %if.end
+  %xor = xor i8 %tmp, 1
+  %tobool = trunc i8 %xor to i1
+  br i1 %tobool, label %if.then, label %if.end
 
 if.then:
   call void @test2(i8* null)
diff --git a/test/CodeGen/X86/fast-isel-sext.ll b/test/CodeGen/X86/fast-isel-sext.ll
new file mode 100644
index 0000000..ca1558e
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-sext.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=x86_64-linux -fast-isel -show-mc-encoding < %s | FileCheck %s
+
+; CHECK-LABEL: f:
+; CHECK:       addl $-2, %eax         # encoding: [0x83,0xc0,0xfe]
+define i32 @f(i32* %y) {
+  %x = load i32, i32* %y
+  %dec = add i32 %x, -2
+  ret i32 %dec
+}
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index d4bbb63..d748cba 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -190,7 +190,7 @@ define void @test16() nounwind {
 ; CHECK: movl $1, %edi
 ; CHECK: movb $0, %al
 ; CHECK: callq _test16callee
-  call void (...)* @test16callee(i32 1)
+  call void (...) @test16callee(i32 1)
   br label %block2
 
 block2:
@@ -201,7 +201,7 @@ block2:
 ; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0
 ; AVX: movb $1, %al
 ; AVX: callq _test16callee
-  call void (...)* @test16callee(double 1.000000e+00)
+  call void (...) @test16callee(double 1.000000e+00)
   ret void
 }
 
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll
new file mode 100644
index 0000000..279bb06
--- /dev/null
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; Anything more than one division using a single divisor operand
+; should be converted into a reciprocal and multiplication.
+
+define float @div1_arcp(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: div1_arcp:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    divss %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv arcp float %x, %y
+  ret float %div1
+}
+
+define float @div2_arcp(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: div2_arcp:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    divss %xmm2, %xmm3
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    mulss %xmm1, %xmm0
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv arcp float %x, %z
+  %mul = fmul arcp float %div1, %y
+  %div2 = fdiv arcp float %mul, %z
+  ret float %div2
+}
+
+; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
index dcc1382..6c5d8ce 100644
--- a/test/CodeGen/X86/fltused.ll
+++ b/test/CodeGen/X86/fltused.ll
@@ -11,7 +11,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/fltused_function_pointer.ll b/test/CodeGen/X86/fltused_function_pointer.ll
index ba5879a..a41ae48 100644
--- a/test/CodeGen/X86/fltused_function_pointer.ll
+++ b/test/CodeGen/X86/fltused_function_pointer.ll
@@ -11,7 +11,7 @@
 
 define i32 @foo(i32 (i8*, ...)* %f) nounwind {
 entry:
-  %call = tail call i32 (i8*, ...)* %f(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  %call = tail call i32 (i8*, ...) %f(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll
index df90254..79ef28b 100644
--- a/test/CodeGen/X86/fp-stack-O0.ll
+++ b/test/CodeGen/X86/fp-stack-O0.ll
@@ -17,7 +17,7 @@ declare i32 @x2(x86_fp80, x86_fp80) nounwind
 ; CHECK-NEXT: x2
 define i32 @test1() nounwind uwtable ssp {
 entry:
-  %call = call x86_fp80 (...)* bitcast (x86_fp80 (i32)* @x1 to x86_fp80 (...)*)(i32 -1)
+  %call = call x86_fp80 (...) bitcast (x86_fp80 (i32)* @x1 to x86_fp80 (...)*)(i32 -1)
   %call1 = call i32 @x2(x86_fp80 %call, x86_fp80 0xK401EFFFFFFFF00000000)
   ret i32 %call1
 }
diff --git a/test/CodeGen/X86/fp-stack-ret-store.ll b/test/CodeGen/X86/fp-stack-ret-store.ll
index 05dfc54..c7cbb2a 100644
--- a/test/CodeGen/X86/fp-stack-ret-store.ll
+++ b/test/CodeGen/X86/fp-stack-ret-store.ll
@@ -7,7 +7,7 @@ target triple = "i686-apple-darwin8"
 
 define void @bar(double* %P) {
 entry:
-	%tmp = tail call double (...)* @foo( )		; <double> [#uses=1]
+	%tmp = tail call double (...) @foo( )		; <double> [#uses=1]
 	store double %tmp, double* %P, align 8
 	ret void
 }
@@ -16,7 +16,7 @@ declare double @foo(...)
 
 define void @bar2(float* %P) {
 entry:
-	%tmp = tail call double (...)* @foo2( )		; <double> [#uses=1]
+	%tmp = tail call double (...) @foo2( )		; <double> [#uses=1]
 	%tmp1 = fptrunc double %tmp to float		; <float> [#uses=1]
 	store float %tmp1, float* %P, align 4
 	ret void
diff --git a/test/CodeGen/X86/fpstack-debuginstr-kill.ll b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
index 56c8c27..8eb452a 100644
--- a/test/CodeGen/X86/fpstack-debuginstr-kill.ll
+++ b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
@@ -32,7 +32,7 @@ sw.bb735:                                         ; preds = %if.end511
   unreachable
 
 if.end41.i2210:                                   ; preds = %if.end511
-  call void @llvm.dbg.value(metadata x86_fp80 %src.sroa.0.0.src.sroa.0.0.2280, i64 0, metadata !20, metadata !MDExpression())
+  call void @llvm.dbg.value(metadata x86_fp80 %src.sroa.0.0.src.sroa.0.0.2280, i64 0, metadata !20, metadata !MDExpression()), !dbg !MDLocation(scope: !4)
   unreachable
 
 sw.bb992:                                         ; preds = %if.end511
diff --git a/test/CodeGen/X86/frameescape.ll b/test/CodeGen/X86/frameescape.ll
index 40eeb0e..3a624ae 100644
--- a/test/CodeGen/X86/frameescape.ll
+++ b/test/CodeGen/X86/frameescape.ll
@@ -12,11 +12,11 @@ define void @print_framealloc_from_fp(i8* %fp) {
   %a.i8 = call i8* @llvm.framerecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 0)
   %a = bitcast i8* %a.i8 to i32*
   %a.val = load i32, i32* %a
-  call i32 (i8*, ...)* @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %a.val)
+  call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %a.val)
   %b.i8 = call i8* @llvm.framerecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 1)
   %b = bitcast i8* %b.i8 to i32*
   %b.val = load i32, i32* %b
-  call i32 (i8*, ...)* @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b.val)
+  call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b.val)
   store i32 42, i32* %b
   ret void
 }
@@ -53,7 +53,7 @@ define void @print_framealloc_from_fp(i8* %fp) {
 define void @alloc_func() {
   %a = alloca i32
   %b = alloca i32
-  call void (...)* @llvm.frameescape(i32* %a, i32* %b)
+  call void (...) @llvm.frameescape(i32* %a, i32* %b)
   store i32 42, i32* %a
   store i32 13, i32* %b
   %fp = call i8* @llvm.frameaddress(i32 0)
@@ -97,7 +97,7 @@ define i32 @main() {
 define void @alloc_func_no_frameaddr() {
   %a = alloca i32
   %b = alloca i32
-  call void (...)* @llvm.frameescape(i32* %a, i32* %b)
+  call void (...) @llvm.frameescape(i32* %a, i32* %b)
   store i32 42, i32* %a
   store i32 13, i32* %b
   call void @print_framealloc_from_fp(i8* null)
diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
index 8a0b07b..29d0c28 100644
--- a/test/CodeGen/X86/h-registers-3.ll
+++ b/test/CodeGen/X86/h-registers-3.ll
@@ -3,7 +3,7 @@
 
 define zeroext i8 @foo() nounwind ssp {
 entry:
-  %0 = tail call zeroext i16 (...)* @bar() nounwind
+  %0 = tail call zeroext i16 (...) @bar() nounwind
   %1 = lshr i16 %0, 8
   %2 = trunc i16 %1 to i8
   ret i8 %2
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
index 01d1b8c..65f8340 100644
--- a/test/CodeGen/X86/hoist-common.ll
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -26,7 +26,7 @@ entry:
 
 if.then:
 ; CHECK: callq
-  %call = tail call zeroext i1 (...)* @foo() nounwind
+  %call = tail call zeroext i1 (...) @foo() nounwind
   br label %return
 
 return:
diff --git a/test/CodeGen/X86/inline-asm-duplicated-constraint.ll b/test/CodeGen/X86/inline-asm-duplicated-constraint.ll
new file mode 100644
index 0000000..2ef5474
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-duplicated-constraint.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -no-integrated-as -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; CHECK-LABEL: test1:
+; CHECK: movl	(%rdi), %eax
+; CHECK: nop
+; CHECK: movl	%eax, (%rdi)
+; CHECK: ret
+define void @test1(i32* %l) {
+  %load = load i32, i32* %l
+  call void asm "nop", "=*rmrm,0m0m,~{dirflag},~{fpsr},~{flags}"(i32* %l, i32 %load)
+  ret void
+}
diff --git a/test/CodeGen/X86/invalid-shift-immediate.ll b/test/CodeGen/X86/invalid-shift-immediate.ll
index 21ad6e8..1fb80c7 100644
--- a/test/CodeGen/X86/invalid-shift-immediate.ll
+++ b/test/CodeGen/X86/invalid-shift-immediate.ll
@@ -17,7 +17,7 @@ entry:
 	br i1 %toBool, label %bb, label %bb5
 
 bb:		; preds = %entry
-	%tmp4 = call i32 (...)* @bar( ) nounwind 		; <i32> [#uses=0]
+	%tmp4 = call i32 (...) @bar( ) nounwind 		; <i32> [#uses=0]
 	br label %bb5
 
 bb5:		; preds = %bb, %entry
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 31a7af3..ca3e8bf 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -9,11 +9,11 @@ entry:
 	br i1 %tmp, label %cond_true, label %cond_next
 
 cond_true:		; preds = %entry
-	%tmp2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp2 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %entry
-	%tmp3 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]
+	%tmp3 = tail call i32 (...) @baz( )		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index 4ec2b52..42e6d12 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -81,7 +81,7 @@ for.inc35:                                        ; preds = %for.body15, %for.en
 
 while.end:                                        ; preds = %while.cond.loopexit, %while.cond.preheader
   %count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; <i32> [#uses=1]
-  %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+  %call40 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/licm-regpressure.ll b/test/CodeGen/X86/licm-regpressure.ll
new file mode 100644
index 0000000..0ab6554
--- /dev/null
+++ b/test/CodeGen/X86/licm-regpressure.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; This tests currently fails as MachineLICM does not compute register pressure
+; correctly. More details: llvm.org/PR23143
+; XFAIL: *
+
+; MachineLICM should take register pressure into account.
+; CHECK-NOT: Spill
+
+%struct.A = type { i32, i32, i32, i32, i32, i32, i32 }
+
+define void @test(i1 %b, %struct.A* %a) nounwind {
+entry:
+  br label %loop-header
+
+loop-header:
+  br label %loop-body
+
+loop-body:
+  %0 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+  %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
+  %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2
+  %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3
+  %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4
+  %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5
+  %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 6
+  call void @assign(i32* %0)
+  call void @assign(i32* %1)
+  call void @assign(i32* %2)
+  call void @assign(i32* %3)
+  call void @assign(i32* %4)
+  call void @assign(i32* %5)
+  call void @assign(i32* %6)
+  br i1 %b, label %loop-body, label %loop-exit
+
+loop-exit:
+  ret void
+}
+
+declare void @assign(i32*)
diff --git a/test/CodeGen/X86/licm-symbol.ll b/test/CodeGen/X86/licm-symbol.ll
index 854ea0b..0f115dd 100644
--- a/test/CodeGen/X86/licm-symbol.ll
+++ b/test/CodeGen/X86/licm-symbol.ll
@@ -29,11 +29,11 @@ bb151:                                            ; preds = %bb59, %bb56, %bb14
   br i1 undef, label %bb56, label %bb59
 
 bb56:                                             ; preds = %bb151
-  %t0 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE], [0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  %t0 = call i32 (%struct.FILE*) @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE], [0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
   br label %bb151
 
 bb59:                                             ; preds = %bb151
-  %t1 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE], [0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  %t1 = call i32 (%struct.FILE*) @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE], [0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
   br label %bb151
 }
 
diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll
index e75f5b2..09c892c 100644
--- a/test/CodeGen/X86/lsr-normalization.ll
+++ b/test/CodeGen/X86/lsr-normalization.ll
@@ -71,7 +71,7 @@ bb25:                                             ; preds = %bb25, %bb23
 
 bb32:                                             ; preds = %bb25
   %tmp33 = mul i64 %tmp31, %tmp24                 ; <i64> [#uses=1]
-  %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
+  %tmp34 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
   br label %bb35
 
 bb35:                                             ; preds = %bb32, %bb14
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index ce3ab4c..c6876d2 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -62,7 +62,7 @@ if.end34:                                         ; preds = %sw.bb
 ; CHECK: %if.end34
 ; CHECK: leal
 ; CHECK-NOT: imull
-  tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
+  tail call void (...) @printf(i32 %test_case, i32 %mul20) nounwind
   %tmp = mul i32 %scale, %test_case
   %tmp752 = mul i32 %tmp, 3
   %tmp753 = zext i32 %tmp752 to i64
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index d5a3d8e..e5f1f52 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -11,7 +11,7 @@ declare i32 @memcmp(...)
 
 define void @memcmp2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
   br i1 %1, label %return, label %bb
 
@@ -31,7 +31,7 @@ return:                                           ; preds = %entry
 
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
   br i1 %1, label %return, label %bb
 
@@ -49,7 +49,7 @@ return:                                           ; preds = %entry
 
 define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
   br i1 %1, label %return, label %bb
 
@@ -66,7 +66,7 @@ return:                                           ; preds = %entry
 
 define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
   br i1 %1, label %return, label %bb
 
@@ -82,7 +82,7 @@ return:                                           ; preds = %entry
 
 define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
   br i1 %1, label %return, label %bb
 
@@ -99,7 +99,7 @@ return:                                           ; preds = %entry
 
 define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
 entry:
-  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
+  %0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
   br i1 %1, label %return, label %bb
 
diff --git a/test/CodeGen/X86/misched-code-difference-with-debug.ll b/test/CodeGen/X86/misched-code-difference-with-debug.ll
index 256db8b..2cc70e1 100644
--- a/test/CodeGen/X86/misched-code-difference-with-debug.ll
+++ b/test/CodeGen/X86/misched-code-difference-with-debug.ll
@@ -34,9 +34,9 @@ entry:
   %c = alloca %class.C, align 1
   %0 = load i8, i8* @argc, align 1
   %conv = sext i8 %0 to i32
-  %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
+  %call = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
   %1 = load i8, i8* @argc, align 1
-  %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
+  %call2 = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
   ret void
 }
 
@@ -47,13 +47,13 @@ define void @test_with_debug() {
 entry:
   %c = alloca %class.C, align 1
   %0 = load i8, i8* @argc, align 1
-  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !29)
+  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !29), !dbg !MDLocation(scope: !13)
   %conv = sext i8 %0 to i32
-  tail call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
-  %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
+  tail call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29), !dbg !MDLocation(scope: !13)
+  %call = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
   %1 = load i8, i8* @argc, align 1
-  call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
-  %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
+  call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29), !dbg !MDLocation(scope: !13)
+  %call2 = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
   ret void
 }
 
diff --git a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
index 36ccfe9..2727e3e 100644
--- a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
+++ b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
@@ -14,7 +14,7 @@ define void @t3() nounwind  {
 ; X86-64-NEXT:    jmp _pass_v8qi ## TAILCALL
   %tmp3 = load <8 x i8>, <8 x i8>* @g_v8qi, align 8
   %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
-  %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
+  %tmp4 = tail call i32 (...) @pass_v8qi( x86_mmx %tmp3a ) nounwind
   ret void
 }
 
@@ -34,7 +34,7 @@ define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind  {
   %v2b = bitcast x86_mmx %v2 to <8 x i8>
   %tmp3 = add <8 x i8> %v1a, %v2b
   %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
-  %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
+  %tmp4 = tail call i32 (...) @pass_v8qi( x86_mmx %tmp3a ) nounwind
   ret void
 }
 
diff --git a/test/CodeGen/X86/mmx-bitcast.ll b/test/CodeGen/X86/mmx-bitcast.ll
index 4aa10a9..00c8039 100644
--- a/test/CodeGen/X86/mmx-bitcast.ll
+++ b/test/CodeGen/X86/mmx-bitcast.ll
@@ -75,8 +75,7 @@ define i64 @t5(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-NEXT:    movd
 ; CHECK-NEXT:    movd
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    movd %xmm1, %rax
 ; CHECK-NEXT:    retq
   %v0 = insertelement <2 x i32> undef, i32 %a, i32 0
   %v1 = insertelement <2 x i32> %v0, i32 %b, i32 1
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll
index 4278910..f89e524 100644
--- a/test/CodeGen/X86/movtopush.ll
+++ b/test/CodeGen/X86/movtopush.ll
@@ -265,7 +265,7 @@ define void @test10() optsize {
   store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
   %good_ptr = load volatile void (i32, i32, i32, i32)*, void (i32, i32, i32, i32)** %stack_fptr
   call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
-  call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4)
+  call void (i32, i32, i32, i32) %good_ptr(i32 1, i32 2, i32 3, i32 4)
   ret void
 }
 
diff --git a/test/CodeGen/X86/musttail-fastcall.ll b/test/CodeGen/X86/musttail-fastcall.ll
index ed3668d..a95e0ff 100644
--- a/test/CodeGen/X86/musttail-fastcall.ll
+++ b/test/CodeGen/X86/musttail-fastcall.ll
@@ -9,13 +9,13 @@
 declare void @puts(i8*)
 
 define i32 @call_fast_thunk() {
-  %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+  %r = call x86_fastcallcc i32 (...) @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
   ret i32 %r
 }
 
 define x86_fastcallcc i32 @fast_thunk(...) {
   call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
-  %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
+  %r = musttail call x86_fastcallcc i32 (...) bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
   ret i32 %r
 }
 
@@ -38,13 +38,13 @@ define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
 ; Repeat the test for vectorcall, which has XMM registers.
 
 define i32 @call_vector_thunk() {
-  %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+  %r = call x86_vectorcallcc i32 (...) @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
   ret i32 %r
 }
 
 define x86_vectorcallcc i32 @vector_thunk(...) {
   call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
-  %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
+  %r = musttail call x86_vectorcallcc i32 (...) bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
   ret i32 %r
 }
 
diff --git a/test/CodeGen/X86/musttail-varargs.ll b/test/CodeGen/X86/musttail-varargs.ll
index 52115b2..3613f4c 100644
--- a/test/CodeGen/X86/musttail-varargs.ll
+++ b/test/CodeGen/X86/musttail-varargs.ll
@@ -16,8 +16,8 @@ define void @f_thunk(i8* %this, ...) {
   %ap_i8 = bitcast [4 x i8*]* %ap to i8*
   call void @llvm.va_start(i8* %ap_i8)
 
-  %fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this)
-  musttail call void (i8*, ...)* %fptr(i8* %this, ...)
+  %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this)
+  musttail call void (i8*, ...) %fptr(i8* %this, ...)
   ret void
 }
 
@@ -84,7 +84,7 @@ define void @f_thunk(i8* %this, ...) {
 
 define void @g_thunk(i8* %fptr_i8, ...) {
   %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
-  musttail call void (i8*, ...)* %fptr(i8* %fptr_i8, ...)
+  musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...)
   ret void
 }
 
@@ -114,7 +114,7 @@ then:
   %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1
   %a_i8 = load i8*, i8** %a_p
   %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
-  musttail call void (%struct.Foo*, ...)* %a(%struct.Foo* %this, ...)
+  musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...)
   ret void
 
 else:
@@ -122,7 +122,7 @@ else:
   %b_i8 = load i8*, i8** %b_p
   %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
   store i32 42, i32* @g
-  musttail call void (%struct.Foo*, ...)* %b(%struct.Foo* %this, ...)
+  musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...)
   ret void
 }
 
diff --git a/test/CodeGen/X86/narrow-shl-cst.ll b/test/CodeGen/X86/narrow-shl-cst.ll
index 40b9760..c9e9a3d 100644
--- a/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/test/CodeGen/X86/narrow-shl-cst.ll
@@ -99,3 +99,26 @@ define i64 @test11(i64 %x) nounwind {
 ; CHECK: xorq $-65536
 ; CHECK: shlq $33
 }
+
+; PR23098
+define i32 @test12(i32 %x, i32* %y) nounwind {
+  %and = shl i32 %x, 1
+  %shl = and i32 %and, 255
+  store i32 %shl, i32* %y
+  ret i32 %shl
+; CHECK-LABEL: test12:
+; CHECK: andl $127
+; CHECK-NEXT: addl
+; CHECK-NOT: shl
+}
+
+define i64 @test13(i64 %x, i64* %y) nounwind {
+  %and = shl i64 %x, 1
+  %shl = and i64 %and, 255
+  store i64 %shl, i64* %y
+  ret i64 %shl
+; CHECK-LABEL: test13:
+; CHECK: andq $127
+; CHECK-NEXT: addq
+; CHECK-NOT: shl
+}
diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll
index f62f372..8c08b3c 100644
--- a/test/CodeGen/X86/nontemporal-2.ll
+++ b/test/CodeGen/X86/nontemporal-2.ll
@@ -1,31 +1,303 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
 
 ; Make sure that we generate non-temporal stores for the test cases below.
+; We use xorps for zeroing, so domain information isn't available anymore.
 
-define void @test1(<4 x float>* %dst) {
-; CHECK-LABEL: test1:
+define void @test_zero_v4f32(<4 x float>* %dst) {
+; CHECK-LABEL: test_zero_v4f32:
 ; SSE: movntps
 ; AVX: vmovntps
   store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
   ret void
 }
 
-define void @test2(<4 x i32>* %dst) {
-; CHECK-LABEL: test2:
+define void @test_zero_v4i32(<4 x i32>* %dst) {
+; CHECK-LABEL: test_zero_v4i32:
 ; SSE: movntps
 ; AVX: vmovntps
   store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
   ret void
 }
 
-define void @test3(<2 x double>* %dst) {
-; CHECK-LABEL: test3:
+define void @test_zero_v2f64(<2 x double>* %dst) {
+; CHECK-LABEL: test_zero_v2f64:
 ; SSE: movntps
 ; AVX: vmovntps
   store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
   ret void
 }
 
+define void @test_zero_v2i64(<2 x i64>* %dst) {
+; CHECK-LABEL: test_zero_v2i64:
+; SSE: movntps
+; AVX: vmovntps
+  store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_zero_v8i16(<8 x i16>* %dst) {
+; CHECK-LABEL: test_zero_v8i16:
+; SSE: movntps
+; AVX: vmovntps
+  store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_zero_v16i8(<16 x i8>* %dst) {
+; CHECK-LABEL: test_zero_v16i8:
+; SSE: movntps
+; AVX: vmovntps
+  store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+; And now YMM versions.
+
+define void @test_zero_v8f32(<8 x float>* %dst) {
+; CHECK-LABEL: test_zero_v8f32:
+; AVX: vmovntps %ymm
+  store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_zero_v8i32(<8 x i32>* %dst) {
+; CHECK-LABEL: test_zero_v8i32:
+; AVX2: vmovntps %ymm
+  store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_zero_v4f64(<4 x double>* %dst) {
+; CHECK-LABEL: test_zero_v4f64:
+; AVX: vmovntps %ymm
+  store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_zero_v4i64(<4 x i64>* %dst) {
+; CHECK-LABEL: test_zero_v4i64:
+; AVX2: vmovntps %ymm
+  store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_zero_v16i16(<16 x i16>* %dst) {
+; CHECK-LABEL: test_zero_v16i16:
+; AVX2: vmovntps %ymm
+  store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_zero_v32i8(<32 x i8>* %dst) {
+; CHECK-LABEL: test_zero_v32i8:
+; AVX2: vmovntps %ymm
+  store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+
+; Check that we also handle arguments.  Here the type survives longer.
+
+define void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) {
+; CHECK-LABEL: test_arg_v4f32:
+; SSE: movntps
+; AVX: vmovntps
+  store <4 x float> %arg, <4 x float>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %dst) {
+; CHECK-LABEL: test_arg_v4i32:
+; SSE: movntps
+; AVX: vmovntps
+  store <4 x i32> %arg, <4 x i32>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v2f64(<2 x double> %arg, <2 x double>* %dst) {
+; CHECK-LABEL: test_arg_v2f64:
+; SSE: movntps
+; AVX: vmovntps
+  store <2 x double> %arg, <2 x double>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %dst) {
+; CHECK-LABEL: test_arg_v2i64:
+; SSE: movntps
+; AVX: vmovntps
+  store <2 x i64> %arg, <2 x i64>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %dst) {
+; CHECK-LABEL: test_arg_v8i16:
+; SSE: movntps
+; AVX: vmovntps
+  store <8 x i16> %arg, <8 x i16>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %dst) {
+; CHECK-LABEL: test_arg_v16i8:
+; SSE: movntps
+; AVX: vmovntps
+  store <16 x i8> %arg, <16 x i8>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+; And now YMM versions.
+
+define void @test_arg_v8f32(<8 x float> %arg, <8 x float>* %dst) {
+; CHECK-LABEL: test_arg_v8f32:
+; AVX: vmovntps %ymm
+  store <8 x float> %arg, <8 x float>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %dst) {
+; CHECK-LABEL: test_arg_v8i32:
+; AVX2: vmovntps %ymm
+  store <8 x i32> %arg, <8 x i32>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v4f64(<4 x double> %arg, <4 x double>* %dst) {
+; CHECK-LABEL: test_arg_v4f64:
+; AVX: vmovntps %ymm
+  store <4 x double> %arg, <4 x double>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %dst) {
+; CHECK-LABEL: test_arg_v4i64:
+; AVX2: vmovntps %ymm
+  store <4 x i64> %arg, <4 x i64>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %dst) {
+; CHECK-LABEL: test_arg_v16i16:
+; AVX2: vmovntps %ymm
+  store <16 x i16> %arg, <16 x i16>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %dst) {
+; CHECK-LABEL: test_arg_v32i8:
+; AVX2: vmovntps %ymm
+  store <32 x i8> %arg, <32 x i8>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+
+; Now check that if the execution domain is trivially visible, we use it.
+; We use an add to make the type survive all the way to the MOVNT.
+
+define void @test_op_v4f32(<4 x float> %a, <4 x float> %b, <4 x float>* %dst) {
+; CHECK-LABEL: test_op_v4f32:
+; SSE: movntps
+; AVX: vmovntps
+  %r = fadd <4 x float> %a, %b
+  store <4 x float> %r, <4 x float>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32>* %dst) {
+; CHECK-LABEL: test_op_v4i32:
+; SSE: movntdq
+; AVX: vmovntdq
+  %r = add <4 x i32> %a, %b
+  store <4 x i32> %r, <4 x i32>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v2f64(<2 x double> %a, <2 x double> %b, <2 x double>* %dst) {
+; CHECK-LABEL: test_op_v2f64:
+; SSE: movntpd
+; AVX: vmovntpd
+  %r = fadd <2 x double> %a, %b
+  store <2 x double> %r, <2 x double>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %dst) {
+; CHECK-LABEL: test_op_v2i64:
+; SSE: movntdq
+; AVX: vmovntdq
+  %r = add <2 x i64> %a, %b
+  store <2 x i64> %r, <2 x i64>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16>* %dst) {
+; CHECK-LABEL: test_op_v8i16:
+; SSE: movntdq
+; AVX: vmovntdq
+  %r = add <8 x i16> %a, %b
+  store <8 x i16> %r, <8 x i16>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8>* %dst) {
+; CHECK-LABEL: test_op_v16i8:
+; SSE: movntdq
+; AVX: vmovntdq
+  %r = add <16 x i8> %a, %b
+  store <16 x i8> %r, <16 x i8>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+; And now YMM versions.
+
+define void @test_op_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
+; CHECK-LABEL: test_op_v8f32:
+; AVX: vmovntps %ymm
+  %r = fadd <8 x float> %a, %b
+  store <8 x float> %r, <8 x float>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
+; CHECK-LABEL: test_op_v8i32:
+; AVX2: vmovntdq %ymm
+  %r = add <8 x i32> %a, %b
+  store <8 x i32> %r, <8 x i32>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v4f64(<4 x double> %a, <4 x double> %b, <4 x double>* %dst) {
+; CHECK-LABEL: test_op_v4f64:
+; AVX: vmovntpd %ymm
+  %r = fadd <4 x double> %a, %b
+  store <4 x double> %r, <4 x double>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
+; CHECK-LABEL: test_op_v4i64:
+; AVX2: vmovntdq %ymm
+  %r = add <4 x i64> %a, %b
+  store <4 x i64> %r, <4 x i64>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
+; CHECK-LABEL: test_op_v16i16:
+; AVX2: vmovntdq %ymm
+  %r = add <16 x i16> %a, %b
+  store <16 x i16> %r, <16 x i16>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
+; CHECK-LABEL: test_op_v32i8:
+; AVX2: vmovntdq %ymm
+  %r = add <32 x i8> %a, %b
+  store <32 x i8> %r, <32 x i8>* %dst, align 32, !nontemporal !1
+  ret void
+}
+
 !1 = !{i32 1}
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index 9ebf890..ae3ed3f 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -2,14 +2,14 @@
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
 entry:
-	%tmp = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	%tmp.upgrd.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]
 	%tmp3 = icmp slt i32 %Y, 5		; <i1> [#uses=1]
 	%tmp4 = or i1 %tmp3, %tmp.upgrd.1		; <i1> [#uses=1]
 	br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
 
 cond_true:		; preds = %entry
-	%tmp5 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp5 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
 	ret void
 
 UnifiedReturnBlock:		; preds = %entry
diff --git a/test/CodeGen/X86/patchpoint-webkit_jscc.ll b/test/CodeGen/X86/patchpoint-webkit_jscc.ll
index 37bdd7d..5c39438 100644
--- a/test/CodeGen/X86/patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/X86/patchpoint-webkit_jscc.ll
@@ -25,9 +25,9 @@ entry:
 ; FAST:       movq %rax, (%rsp)
 ; FAST:       callq
   %resolveCall2 = inttoptr i64 -559038736 to i8*
-  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
+  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
   %resolveCall3 = inttoptr i64 -559038737 to i8*
-  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
+  tail call webkit_jscc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
   ret void
 }
 
@@ -51,7 +51,7 @@ entry:
 ; FAST-NEXT:  movabsq $-559038736, %r11
 ; FAST-NEXT:  callq *%r11
   %call = inttoptr i64 -559038736 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
   ret i64 %result
 }
 
@@ -79,7 +79,7 @@ entry:
 ; FAST-NEXT:  movabsq $-559038736, %r11
 ; FAST-NEXT:  callq *%r11
   %call = inttoptr i64 -559038736 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
   ret i64 %result
 }
 
diff --git a/test/CodeGen/X86/patchpoint.ll b/test/CodeGen/X86/patchpoint.ll
index 24e324f..eda13fd 100644
--- a/test/CodeGen/X86/patchpoint.ll
+++ b/test/CodeGen/X86/patchpoint.ll
@@ -15,9 +15,9 @@ entry:
 ; CHECK:      movq %[[REG]], %rax
 ; CHECK:      ret
   %resolveCall2 = inttoptr i64 -559038736 to i8*
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
   %resolveCall3 = inttoptr i64 -559038737 to i8*
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
   ret i64 %result
 }
 
@@ -35,7 +35,7 @@ entry:
   store i64 11, i64* %metadata
   store i64 12, i64* %metadata
   store i64 13, i64* %metadata
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
   ret void
 }
 
@@ -48,14 +48,14 @@ entry:
   %tmp80 = add i64 %tmp79, -16
   %tmp81 = inttoptr i64 %tmp80 to i64*
   %tmp82 = load i64, i64* %tmp81, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
   %tmp83 = load i64, i64* %tmp33, align 8
   %tmp84 = add i64 %tmp83, -24
   %tmp85 = inttoptr i64 %tmp84 to i64*
   %tmp86 = load i64, i64* %tmp85, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
   ret i64 10
 }
 
@@ -67,7 +67,7 @@ entry:
 ; CHECK:      nopl 8(%rax,%rax)
 ; CHECK-NEXT: popq
 ; CHECK-NEXT: ret
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
   ret void
 }
 
@@ -78,7 +78,7 @@ entry:
 ; CHECK:      movabsq $6153737369414576827, %r11
 ; CHECK-NEXT: callq *%r11
   %resolveCall2 = inttoptr i64 6153737369414576827 to i8*
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 0)
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 0)
   ret i64 %result
 }
 
diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
index ca364f2..a0adba0 100644
--- a/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -51,7 +51,7 @@ entry:
   %3 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
   call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind
   %4 = load i32, i32* %result, align 4                 ; <i32> [#uses=1]
-  %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
+  %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
   store i32 0, i32* %0, align 4
   %6 = load i32, i32* %0, align 4                      ; <i32> [#uses=1]
   store i32 %6, i32* %retval, align 4
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index faaf73b..d543deb 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -69,10 +69,10 @@ entry:
 
 define void @test3() nounwind {
 entry:
-    %tmp = call void(...)*(...)* @afoo()
+    %tmp = call void(...)*(...) @afoo()
     store void(...)* %tmp, void(...)** @pfoo
     %tmp1 = load void(...)*, void(...)** @pfoo
-    call void(...)* %tmp1()
+    call void(...) %tmp1()
     ret void
 ; LINUX-LABEL: test3:
 ; LINUX: 	calll	.L3$pb
@@ -88,7 +88,7 @@ declare void(...)* @afoo(...)
 
 define void @test4() nounwind {
 entry:
-    call void(...)* @foo()
+    call void(...) @foo()
     ret void
 ; LINUX-LABEL: test4:
 ; LINUX: calll	.L4$pb
@@ -146,43 +146,43 @@ define void @test7(i32 %n.u) nounwind {
 entry:
     switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
 bb:
-    tail call void(...)* @foo1()
+    tail call void(...) @foo1()
     ret void
 bb1:
-    tail call void(...)* @foo2()
+    tail call void(...) @foo2()
     ret void
 bb2:
-    tail call void(...)* @foo6()
+    tail call void(...) @foo6()
     ret void
 bb3:
-    tail call void(...)* @foo3()
+    tail call void(...) @foo3()
     ret void
 bb4:
-    tail call void(...)* @foo4()
+    tail call void(...) @foo4()
     ret void
 bb5:
-    tail call void(...)* @foo5()
+    tail call void(...) @foo5()
     ret void
 bb6:
-    tail call void(...)* @foo1()
+    tail call void(...) @foo1()
     ret void
 bb7:
-    tail call void(...)* @foo2()
+    tail call void(...) @foo2()
     ret void
 bb8:
-    tail call void(...)* @foo6()
+    tail call void(...) @foo6()
     ret void
 bb9:
-    tail call void(...)* @foo3()
+    tail call void(...) @foo3()
     ret void
 bb10:
-    tail call void(...)* @foo4()
+    tail call void(...) @foo4()
     ret void
 bb11:
-    tail call void(...)* @foo5()
+    tail call void(...) @foo5()
     ret void
 bb12:
-    tail call void(...)* @foo6()
+    tail call void(...) @foo6()
     ret void
     
 ; LINUX-LABEL: test7:
diff --git a/test/CodeGen/X86/pr1489.ll b/test/CodeGen/X86/pr1489.ll
index d37b9a2..13ced2a 100644
--- a/test/CodeGen/X86/pr1489.ll
+++ b/test/CodeGen/X86/pr1489.ll
@@ -48,7 +48,7 @@ entry:
 	%tmp1 = tail call i32 @bar( )		; <i32> [#uses=1]
 	%tmp2 = tail call i32 @foo( )		; <i32> [#uses=1]
 	%tmp3 = tail call i32 @quux( )		; <i32> [#uses=1]
-	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %tmp3, i32 %tmp2, i32 %tmp1, i32 %tmp )		; <i32> [#uses=0]
+	%tmp5 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %tmp3, i32 %tmp2, i32 %tmp1, i32 %tmp )		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/pr18023.ll b/test/CodeGen/X86/pr18023.ll
index ed3d6a0..c7ea20c 100644
--- a/test/CodeGen/X86/pr18023.ll
+++ b/test/CodeGen/X86/pr18023.ll
@@ -24,7 +24,7 @@ define void @func() {
   %3 = load volatile i32, i32* @b, align 4
   store i32 3, i32* @c, align 4
   %4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %4)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %4)
   ret void
 }
 
diff --git a/test/CodeGen/X86/pr23246.ll b/test/CodeGen/X86/pr23246.ll
new file mode 100644
index 0000000..6eb24a6
--- /dev/null
+++ b/test/CodeGen/X86/pr23246.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple x86_64-unknown-unknown | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; PR23246
+; We're really only interested in doing something sane with the shuffle.
+
+; CHECK-LABEL: test:
+; CHECK:      movq2dq %mm0, %xmm0
+; CHECK-NEXT: pshufd {{.*}} xmm0 = xmm0[0,1,0,1]
+; CHECK-NEXT: retq
+define <2 x i64> @test(x86_mmx %a) #0 {
+entry:
+  %b = bitcast x86_mmx %a to <1 x i64>
+  %s = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
+  ret <2 x i64> %s
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/pr2326.ll b/test/CodeGen/X86/pr2326.ll
index 9cf83bb..88c7bb5 100644
--- a/test/CodeGen/X86/pr2326.ll
+++ b/test/CodeGen/X86/pr2326.ll
@@ -17,7 +17,7 @@ entry:
 	%tmp25 = and i1 %toBool23, %toBool24		; <i1> [#uses=1]
 	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
 	%tmp252627 = zext i8 %tmp2526 to i32		; <i32> [#uses=1]
-	%tmp29 = call i32 (...)* @func_15( i32 %tmp252627, i32 0 ) nounwind 		; <i32> [#uses=0]
+	%tmp29 = call i32 (...) @func_15( i32 %tmp252627, i32 0 ) nounwind 		; <i32> [#uses=0]
 	unreachable
 }
 
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 6f31c5f..9a162d7 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -19,7 +19,7 @@ entry:
 	%conv = fpext float %neg to double		; <double> [#uses=1]
 	%neg4 = fsub float -0.000000e+00, %tmp3		; <float> [#uses=1]
 	%conv5 = fpext float %neg4 to double		; <double> [#uses=1]
-	%call = call i32 (...)* @printf( i8* getelementptr ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 )		; <i32> [#uses=0]
+	%call = call i32 (...) @printf( i8* getelementptr ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 )		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/X86/pr2982.ll b/test/CodeGen/X86/pr2982.ll
index ab46005..b7902b8 100644
--- a/test/CodeGen/X86/pr2982.ll
+++ b/test/CodeGen/X86/pr2982.ll
@@ -20,7 +20,7 @@ entry:
         %5 = sext i8 %4 to i32          ; <i32> [#uses=1]
         %6 = add i32 %2, %3             ; <i32> [#uses=1]
         %7 = add i32 %6, %5             ; <i32> [#uses=1]
-        %8 = tail call i32 (...)* @rshift_u_u(i32 %7, i32 0) nounwind          
+        %8 = tail call i32 (...) @rshift_u_u(i32 %7, i32 0) nounwind          
 ; <i32> [#uses=0]
         ret void
 }
diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
index b08a223..c6419d8 100644
--- a/test/CodeGen/X86/pr3244.ll
+++ b/test/CodeGen/X86/pr3244.ll
@@ -10,7 +10,7 @@ entry:
         %1 = load i32, i32* @g_487, align 4          ; <i32> [#uses=1]
         %2 = trunc i16 %0 to i8         ; <i8> [#uses=1]
         %3 = trunc i32 %1 to i8         ; <i8> [#uses=1]
-        %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+        %4 = tail call i32 (...) @func_7(i64 -4455561449541442965, i32 1)
 nounwind             ; <i32> [#uses=1]
         %5 = trunc i32 %4 to i8         ; <i8> [#uses=1]
         %6 = mul i8 %3, %2              ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/pr3250.ll b/test/CodeGen/X86/pr3250.ll
index cccbf54..4ab989e 100644
--- a/test/CodeGen/X86/pr3250.ll
+++ b/test/CodeGen/X86/pr3250.ll
@@ -5,7 +5,7 @@ declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
 
 define i32 @func_106(i32 %p_107) nounwind {
 entry:
-        %0 = tail call i32 (...)* @safe_div_(i32 %p_107, i32 1) nounwind       
+        %0 = tail call i32 (...) @safe_div_(i32 %p_107, i32 1) nounwind       
         ; <i32> [#uses=1]
         %1 = lshr i32 %0, -9            ; <i32> [#uses=1]
         %2 = trunc i32 %1 to i16                ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index 7264bcd..d4c0020 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -4,8 +4,8 @@
 
 define void @foo(double* nocapture %P) nounwind {
 entry:
-	%0 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
-	%1 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
+	%0 = tail call double (...) @test() nounwind		; <double> [#uses=2]
+	%1 = tail call double (...) @test() nounwind		; <double> [#uses=2]
 	%2 = fmul double %0, %0		; <double> [#uses=1]
 	%3 = fmul double %1, %1		; <double> [#uses=1]
 	%4 = fadd double %2, %3		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
index afa1962..9723721 100644
--- a/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -39,7 +39,7 @@ store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %dec.i, 0
 %lor.ext.i = zext i1 %tobool.i to i32
 store i32 %lor.ext.i, i32* @a, align 4
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+%call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
 ret i32 0
 }
 
@@ -53,7 +53,7 @@ store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %0, 0
 %lor.ext.i = zext i1 %tobool.i to i32
 store i32 %lor.ext.i, i32* @a, align 4
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+%call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
 ret i32 0
 }
 
diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll
index 83b86ac..fcd0770 100644
--- a/test/CodeGen/X86/recip-fastmath.ll
+++ b/test/CodeGen/X86/recip-fastmath.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est | FileCheck %s --check-prefix=RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
 
 ; If the target's divss/divps instructions are substantially
 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -20,13 +20,13 @@ define float @reciprocal_estimate(float %x) #0 {
 ; CHECK-NEXT: movaps
 ; CHECK-NEXT: retq
 
-; BTVER2-LABEL: reciprocal_estimate:
-; BTVER2: vrcpss
-; BTVER2: vmulss
-; BTVER2: vsubss
-; BTVER2: vmulss
-; BTVER2: vaddss
-; BTVER2-NEXT: retq
+; RECIP-LABEL: reciprocal_estimate:
+; RECIP: vrcpss
+; RECIP: vmulss
+; RECIP: vsubss
+; RECIP: vmulss
+; RECIP: vaddss
+; RECIP-NEXT: retq
 
 ; REFINE-LABEL: reciprocal_estimate:
 ; REFINE: vrcpss
@@ -51,13 +51,13 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
 ; CHECK-NEXT: movaps
 ; CHECK-NEXT: retq
 
-; BTVER2-LABEL: reciprocal_estimate_v4f32:
-; BTVER2: vrcpps
-; BTVER2: vmulps
-; BTVER2: vsubps
-; BTVER2: vmulps
-; BTVER2: vaddps
-; BTVER2-NEXT: retq
+; RECIP-LABEL: reciprocal_estimate_v4f32:
+; RECIP: vrcpps
+; RECIP: vmulps
+; RECIP: vsubps
+; RECIP: vmulps
+; RECIP: vaddps
+; RECIP-NEXT: retq
 
 ; REFINE-LABEL: reciprocal_estimate_v4f32:
 ; REFINE: vrcpps
@@ -85,13 +85,13 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
 ; CHECK-NEXT: movaps
 ; CHECK-NEXT: retq
 
-; BTVER2-LABEL: reciprocal_estimate_v8f32:
-; BTVER2: vrcpps
-; BTVER2: vmulps
-; BTVER2: vsubps
-; BTVER2: vmulps
-; BTVER2: vaddps
-; BTVER2-NEXT: retq
+; RECIP-LABEL: reciprocal_estimate_v8f32:
+; RECIP: vrcpps
+; RECIP: vmulps
+; RECIP: vsubps
+; RECIP: vmulps
+; RECIP: vaddps
+; RECIP-NEXT: retq
 
 ; REFINE-LABEL: reciprocal_estimate_v8f32:
 ; REFINE: vrcpps
diff --git a/test/CodeGen/X86/scalarize-bitcast.ll b/test/CodeGen/X86/scalarize-bitcast.ll
index 6de511f..60650f4 100644
--- a/test/CodeGen/X86/scalarize-bitcast.ll
+++ b/test/CodeGen/X86/scalarize-bitcast.ll
@@ -21,7 +21,7 @@ entry:
 	%tmp24.i = extractelement <1 x i64> %tmp10.i, i32 0		; <i64> [#uses=1]
 	%tmp10 = bitcast i64 %tmp24.i to <1 x i64>		; <<1 x i64>> [#uses=1]
 	%tmp7 = extractelement <1 x i64> %tmp10, i32 0		; <i64> [#uses=1]
-	%call6 = tail call i32 (...)* @store8888(i64 %tmp7)		; <i32> [#uses=1]
+	%call6 = tail call i32 (...) @store8888(i64 %tmp7)		; <i32> [#uses=1]
 	store i32 %call6, i32* %src
 	ret void
 }
diff --git a/test/CodeGen/X86/scheduler-backtracking.ll b/test/CodeGen/X86/scheduler-backtracking.ll
new file mode 100644
index 0000000..98471ee
--- /dev/null
+++ b/test/CodeGen/X86/scheduler-backtracking.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=x86-64 < %s -pre-RA-sched=list-ilp    | FileCheck %s
+; RUN: llc -march=x86-64 < %s -pre-RA-sched=list-hybrid | FileCheck %s
+; RUN: llc -march=x86-64 < %s -pre-RA-sched=source      | FileCheck %s
+; RUN: llc -march=x86-64 < %s -pre-RA-sched=list-burr   | FileCheck %s
+; RUN: llc -march=x86-64 < %s -pre-RA-sched=linearize   | FileCheck %s
+
+; PR22304 https://llvm.org/bugs/show_bug.cgi?id=22304
+; Tests checking backtracking in source scheduler. llc used to crash on them.
+
+; CHECK-LABEL: test1
+define i256 @test1(i256 %a) {
+  %b = add i256 %a, 1 
+  %m = shl i256 %b, 1
+  %p = add i256 %m, 1
+  %v = lshr i256 %b, %p
+  %t = trunc i256 %v to i1
+  %c = shl i256 1, %p
+  %f = select i1 %t, i256 undef, i256 %c
+  ret i256 %f
+}
+
+; CHECK-LABEL: test2
+define i256 @test2(i256 %a) {
+  %b = sub i256 0, %a
+  %c = and i256 %b, %a
+  %d = call i256 @llvm.ctlz.i256(i256 %c, i1 false)
+  ret i256 %d
+}
+
+; CHECK-LABEL: test3
+define i256 @test3(i256 %n) {
+  %m = sub i256 -1, %n
+  %x = sub i256 0, %n
+  %y = and i256 %x, %m
+  %z = call i256 @llvm.ctlz.i256(i256 %y, i1 false)
+  ret i256 %z
+}
+
+declare i256 @llvm.ctlz.i256(i256, i1) nounwind readnone
+
+; CHECK-LABEL: test4
+define i64 @test4(i64 %a, i64 %b) {
+  %r = zext i64 %b to i256
+  %u = add i256 %r, 1
+  %w = and i256 %u, 1461501637330902918203684832716283019655932542975
+  %x = zext i64 %a to i256
+  %c = icmp uge i256 %w, %x
+  %y = select i1 %c, i64 0, i64 1
+  %z = add i64 %y, 1
+  ret i64 %z
+}
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 4127288..55eaab9 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -11,16 +11,16 @@
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X64-MinGW
 
 ; We used to crash with filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-dragonfly -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-dragonfly -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=i686-dragonfly -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-dragonfly -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -filetype=obj -o /dev/null
 
 ; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris 2> %t.log
 ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll
index 477ad36..ba54f1c 100644
--- a/test/CodeGen/X86/seh-safe-div.ll
+++ b/test/CodeGen/X86/seh-safe-div.ll
@@ -173,15 +173,15 @@ define i32 @main() {
   store i32 10, i32* %n.addr, align 4
   store i32 2, i32* %d.addr, align 4
   %r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
-  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r1)
+  call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r1)
 
   store i32 10, i32* %n.addr, align 4
   store i32 0, i32* %d.addr, align 4
   %r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
-  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r2)
+  call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r2)
 
   %r3 = call i32 @safe_div(i32* %n.addr, i32* null)
-  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r3)
+  call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r3)
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
index 2454af9..6f1ddbd 100644
--- a/test/CodeGen/X86/setcc.ll
+++ b/test/CodeGen/X86/setcc.ll
@@ -29,7 +29,7 @@ define i64 @t3(i64 %x) nounwind readnone ssp {
 entry:
 ; CHECK-LABEL: t3:
 ; CHECK: sbbq %rax, %rax
-; CHECK: andq $64, %rax
+; CHECK: andl $64, %eax
   %0 = icmp ult i64 %x, 18                        ; <i1> [#uses=1]
   %iftmp.2.0 = select i1 %0, i64 64, i64 0        ; <i64> [#uses=1]
   ret i64 %iftmp.2.0
diff --git a/test/CodeGen/X86/shift-pair.ll b/test/CodeGen/X86/shift-pair.ll
index 24ba1fc..62e51f0 100644
--- a/test/CodeGen/X86/shift-pair.ll
+++ b/test/CodeGen/X86/shift-pair.ll
@@ -3,7 +3,7 @@
 define i64 @test(i64 %A) {
 ; CHECK: @test
 ; CHECK: shrq $54
-; CHECK: andq $1020
+; CHECK: andl $1020
 ; CHECK: ret
     %B = lshr i64 %A, 56
     %C = shl i64 %B, 2
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index d32e567..b94960a 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -349,7 +349,7 @@ entry:
 ; X32ABI-LABEL: t17:
 ; X32ABI: xorl %eax, %eax
 ; X32ABI: jmp {{_?}}bar5
-  tail call void (...)* @bar5() nounwind
+  tail call void (...) @bar5() nounwind
   ret void
 }
 
@@ -369,7 +369,7 @@ entry:
 ; X32ABI-LABEL: t18:
 ; X32ABI: xorl %eax, %eax
 ; X32ABI: jmp {{_?}}bar6
-  %0 = tail call double (...)* @bar6() nounwind
+  %0 = tail call double (...) @bar6() nounwind
   ret void
 }
 
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 55aa6aa..2b21f4f 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -11,11 +11,11 @@ entry:
   br i1 %obit, label %overflow, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
 
 overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 ; CHECK-LABEL: test1:
 ; CHECK: imull
@@ -30,11 +30,11 @@ entry:
   br i1 %obit, label %overflow, label %normal
 
 overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
 ; CHECK-LABEL: test2:
 ; CHECK: imull
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index 24b175e..4c6b521 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,132 +1,141 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-sqrt-est | FileCheck %s --check-prefix=ESTIMATE
 
-; generated using "clang -S -O2 -ffast-math -emit-llvm sqrt.c" from
-; #include <math.h>
-; 
-; double fd(double d){
-;   return sqrt(d);
-; }
-; 
-; float ff(float f){
-;   return sqrtf(f);
-; }
-; 
-; long double fld(long double ld){
-;   return sqrtl(ld);
-; }
-;
-; Tests conversion of sqrt function calls into sqrt instructions when
-; -ffast-math is in effect.
+declare double @__sqrt_finite(double) #0
+declare float @__sqrtf_finite(float) #0
+declare x86_fp80 @__sqrtl_finite(x86_fp80) #0
+declare float @llvm.sqrt.f32(float) #0
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
 
-; ModuleID = 'sqrt.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
 
-; Function Attrs: nounwind readnone uwtable
 define double @fd(double %d) #0 {
-entry:
-; CHECK: sqrtsd
-  %call = tail call double @__sqrt_finite(double %d) #2
+; CHECK-LABEL: fd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    sqrtsd %xmm0, %xmm0
+; CHECK-NEXT:    retq
+;
+; ESTIMATE-LABEL: fd:
+; ESTIMATE:       # BB#0:
+; ESTIMATE-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
+; ESTIMATE-NEXT:    retq
+  %call = tail call double @__sqrt_finite(double %d) #1
   ret double %call
 }
 
-; Function Attrs: nounwind readnone
-declare double @__sqrt_finite(double) #1
 
-; Function Attrs: nounwind readnone uwtable
 define float @ff(float %f) #0 {
-entry:
-; CHECK: sqrtss
-  %call = tail call float @__sqrtf_finite(float %f) #2
+; CHECK-LABEL: ff:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    sqrtss %xmm0, %xmm0
+; CHECK-NEXT:    retq
+;
+; ESTIMATE-LABEL: ff:
+; ESTIMATE:       # BB#0:
+; ESTIMATE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; ESTIMATE-NEXT:    vmulss {{.*}}(%rip), %xmm1, %xmm2
+; ESTIMATE-NEXT:    vmulss %xmm1, %xmm1, %xmm1
+; ESTIMATE-NEXT:    vmulss %xmm0, %xmm1, %xmm1
+; ESTIMATE-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
+; ESTIMATE-NEXT:    vmulss %xmm2, %xmm1, %xmm1
+; ESTIMATE-NEXT:    vmulss %xmm1, %xmm0, %xmm1
+; ESTIMATE-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; ESTIMATE-NEXT:    vcmpeqss %xmm2, %xmm0, %xmm0
+; ESTIMATE-NEXT:    vandnps %xmm1, %xmm0, %xmm0
+; ESTIMATE-NEXT:    retq
+  %call = tail call float @__sqrtf_finite(float %f) #1
   ret float %call
 }
 
-; Function Attrs: nounwind readnone
-declare float @__sqrtf_finite(float) #1
 
-; Function Attrs: nounwind readnone uwtable
 define x86_fp80 @fld(x86_fp80 %ld) #0 {
-entry:
-; CHECK: fsqrt
-  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
+; CHECK-LABEL: fld:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fsqrt
+; CHECK-NEXT:    retq
+;
+; ESTIMATE-LABEL: fld:
+; ESTIMATE:       # BB#0:
+; ESTIMATE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; ESTIMATE-NEXT:    fsqrt
+; ESTIMATE-NEXT:    retq
+  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #1
   ret x86_fp80 %call
 }
 
-declare x86_fp80 @__sqrtl_finite(x86_fp80) #1
-
-declare float @llvm.sqrt.f32(float) #1
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
-declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #1
 
-; If the target's sqrtss and divss instructions are substantially
-; slower than rsqrtss with a Newton-Raphson refinement, we should
-; generate the estimate sequence.
 
 define float @reciprocal_square_root(float %x) #0 {
+; CHECK-LABEL: reciprocal_square_root:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    sqrtss %xmm0, %xmm1
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    divss %xmm1, %xmm0
+; CHECK-NEXT:    retq
+;
+; ESTIMATE-LABEL: reciprocal_square_root:
+; ESTIMATE:       # BB#0:
+; ESTIMATE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; ESTIMATE-NEXT:    vmulss {{.*}}(%rip), %xmm1, %xmm2
+; ESTIMATE-NEXT:    vmulss %xmm1, %xmm1, %xmm1
+; ESTIMATE-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; ESTIMATE-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
+; ESTIMATE-NEXT:    vmulss %xmm2, %xmm0, %xmm0
+; ESTIMATE-NEXT:    retq
   %sqrt = tail call float @llvm.sqrt.f32(float %x)
   %div = fdiv fast float 1.0, %sqrt
   ret float %div
-
-; CHECK-LABEL: reciprocal_square_root:
-; CHECK: sqrtss
-; CHECK-NEXT: movss
-; CHECK-NEXT: divss
-; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_square_root:
-; BTVER2: vrsqrtss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vaddss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: retq
 }
 
 define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: reciprocal_square_root_v4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    sqrtps %xmm0, %xmm1
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; CHECK-NEXT:    divps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+;
+; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
+; ESTIMATE:       # BB#0:
+; ESTIMATE-NEXT:    vrsqrtps %xmm0, %xmm1
+; ESTIMATE-NEXT:    vmulps %xmm1, %xmm1, %xmm2
+; ESTIMATE-NEXT:    vmulps %xmm0, %xmm2, %xmm0
+; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
+; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %xmm1, %xmm1
+; ESTIMATE-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; ESTIMATE-NEXT:    retq
   %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
   ret <4 x float> %div
-
-; CHECK-LABEL: reciprocal_square_root_v4f32:
-; CHECK: sqrtps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: divps
-; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_square_root_v4f32:
-; BTVER2: vrsqrtps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: retq
 }
 
 define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
+; CHECK-LABEL: reciprocal_square_root_v8f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    sqrtps %xmm1, %xmm2
+; CHECK-NEXT:    sqrtps %xmm0, %xmm3
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    divps %xmm3, %xmm0
+; CHECK-NEXT:    divps %xmm2, %xmm1
+; CHECK-NEXT:    retq
+;
+; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
+; ESTIMATE:       # BB#0:
+; ESTIMATE-NEXT:    vrsqrtps %ymm0, %ymm1
+; ESTIMATE-NEXT:    vmulps %ymm1, %ymm1, %ymm2
+; ESTIMATE-NEXT:    vmulps %ymm0, %ymm2, %ymm0
+; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
+; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
+; ESTIMATE-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; ESTIMATE-NEXT:    retq
   %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
   ret <8 x float> %div
-
-; CHECK-LABEL: reciprocal_square_root_v8f32:
-; CHECK: sqrtps
-; CHECK-NEXT: sqrtps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: divps
-; CHECK-NEXT: divps
-; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_square_root_v8f32:
-; BTVER2: vrsqrtps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: retq
 }
 
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/X86/sret-implicit.ll b/test/CodeGen/X86/sret-implicit.ll
index 3fade1d..5680952 100644
--- a/test/CodeGen/X86/sret-implicit.ll
+++ b/test/CodeGen/X86/sret-implicit.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s
 ; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin8 -terminal-rule < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -terminal-rule < %s | FileCheck %s
 
 ; CHECK-LABEL: return32
 ; CHECK-DAG: movq	$0, (%rdi)
diff --git a/test/CodeGen/X86/sse-varargs.ll b/test/CodeGen/X86/sse-varargs.ll
index da38f0e..7c3c781 100644
--- a/test/CodeGen/X86/sse-varargs.ll
+++ b/test/CodeGen/X86/sse-varargs.ll
@@ -2,7 +2,7 @@
 
 define i32 @t() nounwind  {
 entry:
-	tail call void (i32, ...)* @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind 
+	tail call void (i32, ...) @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind 
 	ret i32 0
 }
 
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index cab62a3..5afebd2 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -581,7 +581,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
   ; CHECK: test_x86_sse2_storel_dq
   ; CHECK: movl
-  ; CHECK: movq
+  ; CHECK: movlps
   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
   ret void
 }
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index ca13392..3bde991 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1026,29 +1026,24 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
 }
 
 ; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
-define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
+define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 {
 ; X32-LABEL: insertps_pr20411:
 ; X32:       ## BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
-; X32-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X32-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
 ; X32-NEXT:    movdqu %xmm1, (%eax)
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: insertps_pr20411:
-; X64:       ## BB#0:
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
-; X64-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
-; X64-NEXT:    movdqu %xmm1, (%rdi)
+; X64-LABEL: insertps_pr20411:
+; X64:       ## BB#0:
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X64-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; X64-NEXT:    movdqu %xmm1, (%rdi)
 ; X64-NEXT:    retq
-  %gather_load = shufflevector <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %shuffle109 = shufflevector <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; 4 5 6 7
-  %shuffle116 = shufflevector <8 x i32> %gather_load, <8 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; 3 x x x
-  %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 4, i32 3, i32 undef, i32 undef> ; 3 7 x x
-  %ptrcast = bitcast i32* %RET to <4 x i32>*
-  store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
+  %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef>
+  %ptrcast = bitcast i32* %RET to <4 x i32>*
+  store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
   ret void
 }
 
diff --git a/test/CodeGen/X86/stack-folding-3dnow.ll b/test/CodeGen/X86/stack-folding-3dnow.ll
new file mode 100644
index 0000000..955bf44
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-3dnow.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s
+
+define x86_mmx @stack_fold_pavgusb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pavgusb
+  ;CHECK:       pavgusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pf2id(x86_mmx %a) {
+  ;CHECK-LABEL: stack_fold_pf2id
+  ;CHECK:       pf2id {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %a) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pf2iw(x86_mmx %a) {
+  ;CHECK-LABEL: stack_fold_pf2iw
+  ;CHECK:       pf2iw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %a) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfacc(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfacc
+  ;CHECK:       pfacc {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfadd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfadd
+  ;CHECK:       pfadd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfcmpeq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfcmpeq
+  ;CHECK:       pfcmpeq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfcmpge(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfcmpge
+  ;CHECK:       pfcmpge {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfcmpgt(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfcmpgt
+  ;CHECK:       pfcmpgt {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfmax(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfmax
+  ;CHECK:       pfmax {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfmin(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfmin
+  ;CHECK:       pfmin {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfmul(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfmul
+  ;CHECK:       pfmul {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfnacc(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfnacc
+  ;CHECK:       pfnacc {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfpnacc(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfpnacc
+  ;CHECK:       pfpnacc {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrcp(x86_mmx %a) {
+  ;CHECK-LABEL: stack_fold_pfrcp
+  ;CHECK:       pfrcp {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %a) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrcpit1(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfrcpit1
+  ;CHECK:       pfrcpit1 {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrcpit2(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfrcpit2
+  ;CHECK:       pfrcpit2 {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrsqit1(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfrsqit1
+  ;CHECK:       pfrsqit1 {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrsqrt(x86_mmx %a) {
+  ;CHECK-LABEL: stack_fold_pfrsqrt
+  ;CHECK:       pfrsqrt {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %a) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfsub(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfsub
+  ;CHECK:       pfsub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfsubr(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pfsubr
+  ;CHECK:       pfsubr {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pi2fd(x86_mmx %a) {
+  ;CHECK-LABEL: stack_fold_pi2fd
+  ;CHECK:       pi2fd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %a) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pi2fw(x86_mmx %a) {
+  ;CHECK-LABEL: stack_fold_pi2fw
+  ;CHECK:       pi2fw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %a) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmulhrw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmulhrw
+  ;CHECK:       pmulhrw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pswapd(x86_mmx %a) {
+  ;CHECK-LABEL: stack_fold_pswapd
+  ;CHECK:       pswapd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %a) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
diff --git a/test/CodeGen/X86/stack-folding-int-avx1.ll b/test/CodeGen/X86/stack-folding-int-avx1.ll
index a9a21c2..fec297d 100644
--- a/test/CodeGen/X86/stack-folding-int-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-int-avx1.ll
@@ -87,15 +87,19 @@ define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
   ;CHECK:       movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
   %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %2
+  ; add forces execution domain
+  %3 = add <2 x i64> %2, <i64 1, i64 1>
+  ret <2 x i64> %3
 }
 
 define i64 @stack_fold_movq_store(<2 x i64> %a0) {
   ;CHECK-LABEL: stack_fold_movq_store
   ;CHECK:       movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
-  %1 = extractelement <2 x i64> %a0, i32 0
-  %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
-  ret i64 %1
+  ; add forces execution domain
+  %1 = add <2 x i64> %a0, <i64 1, i64 1>
+  %2 = extractelement <2 x i64> %1, i32 0
+  %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  ret i64 %2
 }
 
 define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll
index 6aa2601..e814ae6 100644
--- a/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -62,6 +62,33 @@ define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
 }
 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
 
+;TODO stack_fold_crc32_32_8
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+;TODO stack_fold_crc32_32_16
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+define i32 @stack_fold_crc32_32_32(i32 %a0, i32 %a1) {
+  ;CHECK-LABEL: stack_fold_crc32_32_32
+  ;CHECK:       crc32l {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+  ret i32 %2
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+
+;TODO stack_fold_crc32_64_8
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+
+define i64 @stack_fold_crc32_64_64(i64 %a0, i64 %a1) {
+  ;CHECK-LABEL: stack_fold_crc32_64_64
+  ;CHECK:       crc32q {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
+  ret i64 %2
+}
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
+
 define <4 x i32> @stack_fold_movd_load(i32 %a0) {
   ;CHECK-LABEL: stack_fold_movd_load
   ;CHECK:       movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
@@ -87,15 +114,19 @@ define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
   ;CHECK:       movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
   %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %2
+  ; add forces execution domain
+  %3 = add <2 x i64> %2, <i64 1, i64 1>
+  ret <2 x i64> %3
 }
 
 define i64 @stack_fold_movq_store(<2 x i64> %a0) {
   ;CHECK-LABEL: stack_fold_movq_store
   ;CHECK:       movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
-  %1 = extractelement <2 x i64> %a0, i32 0
-  %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
-  ret i64 %1
+  ; add forces execution domain
+  %1 = add <2 x i64> %a0, <i64 1, i64 1>
+  %2 = extractelement <2 x i64> %1, i32 0
+  %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  ret i64 %2
 }
 
 define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
diff --git a/test/CodeGen/X86/stack-folding-mmx.ll b/test/CodeGen/X86/stack-folding-mmx.ll
new file mode 100644
index 0000000..8a5d4e2
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-mmx.ll
@@ -0,0 +1,566 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s
+
+define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
+  ;CHECK-LABEL: stack_fold_cvtpd2pi
+  ;CHECK:       cvtpd2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) {
+  ;CHECK-LABEL: stack_fold_cvtpi2pd
+  ;CHECK:       cvtpi2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone
+  ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) {
+  ;CHECK-LABEL: stack_fold_cvtpi2ps
+  ;CHECK:       cvtpi2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone
+  ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) {
+  ;CHECK-LABEL: stack_fold_cvtps2pi
+  ;CHECK:       cvtps2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
+
+define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) {
+  ;CHECK-LABEL: stack_fold_cvttpd2pi
+  ;CHECK:       cvttpd2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) {
+  ;CHECK-LABEL: stack_fold_cvttps2pi
+  ;CHECK:       cvttps2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
+
+; TODO stack_fold_movd_load
+; TODO stack_fold_movd_store
+; TODO stack_fold_movq_load
+; TODO stack_fold_movq_store
+
+define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_packssdw
+  ;CHECK:       packssdw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_packsswb
+  ;CHECK:       packsswb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_packuswb
+  ;CHECK:       packuswb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddb
+  ;CHECK:       paddb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddd
+  ;CHECK:       paddd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddq
+  ;CHECK:       paddq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddsb
+  ;CHECK:       paddsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddsw
+  ;CHECK:       paddsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddusb
+  ;CHECK:       paddusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddusw
+  ;CHECK:       paddusw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_paddw
+  ;CHECK:       paddw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pand
+  ;CHECK:       pand {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pandn
+  ;CHECK:       pandn {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pavgb
+  ;CHECK:       pavgb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pavgw
+  ;CHECK:       pavgw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pcmpeqb
+  ;CHECK:       pcmpeqb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pcmpeqd
+  ;CHECK:       pcmpeqd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pcmpeqw
+  ;CHECK:       pcmpeqw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pcmpgtb
+  ;CHECK:       pcmpgtb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pcmpgtd
+  ;CHECK:       pcmpgtd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pcmpgtw
+  ;CHECK:       pcmpgtw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+; TODO stack_fold_pinsrw
+
+define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmaddwd
+  ;CHECK:       pmaddwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmaxsw
+  ;CHECK:       pmaxsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmaxub
+  ;CHECK:       pmaxub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pminsw
+  ;CHECK:       pminsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pminub
+  ;CHECK:       pminub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmulhuw
+  ;CHECK:       pmulhuw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmulhw
+  ;CHECK:       pmulhw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmullw
+  ;CHECK:       pmullw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmuludq
+  ;CHECK:       pmuludq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_por
+  ;CHECK:       por {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psadbw
+  ;CHECK:       psadbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pshufw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pshufw
+  ;CHECK:       pshufw $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pslld
+  ;CHECK:       pslld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psllq
+  ;CHECK:       psllq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psllw
+  ;CHECK:       psllw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psrad
+  ;CHECK:       psrad {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psraw
+  ;CHECK:       psraw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psrld
+  ;CHECK:       psrld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psrlq
+  ;CHECK:       psrlq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psrlw
+  ;CHECK:       psrlw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubb
+  ;CHECK:       psubb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubd
+  ;CHECK:       psubd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubq
+  ;CHECK:       psubq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubsb
+  ;CHECK:       psubsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubsw
+  ;CHECK:       psubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubusb
+  ;CHECK:       psubusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubusw
+  ;CHECK:       psubusw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_psubw
+  ;CHECK:       psubw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_punpckhbw
+  ;CHECK:       punpckhbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_punpckhdq
+  ;CHECK:       punpckhdq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_punpckhwd
+  ;CHECK:       punpckhwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_punpcklbw
+  ;CHECK:       punpcklbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_punpckldq
+  ;CHECK:       punpckldq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_punpcklwd
+  ;CHECK:       punpcklwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pxor
+  ;CHECK:       pxor {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
index 0a4a4f2..6275c8d 100644
--- a/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -48,7 +48,7 @@ attributes #0 = { sspreq }
 !20 = !{}
 !21 = !{i32 2, !"Dwarf Version", i32 2}
 !22 = !{i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0)}
-!23 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32, inlinedAt: !38)
+!23 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
 !24 = !MDSubprogram(name: "min<unsigned long long>", linkageName: "_ZN3__13minIyEERKT_S3_RS1_", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !25, type: !27, templateParams: !33, variables: !35)
 !25 = !MDNamespace(name: "__1", line: 1, file: !26, scope: null)
 !26 = !MDFile(filename: "main.cpp", directory: "/Users/matt/ryan_bug")
@@ -71,13 +71,12 @@ attributes #0 = { sspreq }
 !43 = !{!29, !29, !32, !44}
 !44 = !MDCompositeType(tag: DW_TAG_structure_type, name: "A", size: 8, align: 8, file: !1, scope: !25, elements: !45)
 !45 = !{!46}
-!46 = !MDSubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !44, type: !47, variables: !52)
+!46 = !MDSubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !44, type: !47)
 !47 = !MDSubroutineType(types: !48)
 !48 = !{!13, !49, !50, !50}
 !49 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !44)
 !50 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !51)
 !51 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !13)
-!52 = !{i32 786468}
 !53 = !{!34, !54}
 !54 = !MDTemplateTypeParameter(name: "_Compare", type: !44)
 !55 = !{!56, !57, !58}
@@ -86,7 +85,7 @@ attributes #0 = { sspreq }
 !58 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p3", line: 8, arg: 3, scope: !41, file: !10, type: !44)
 !59 = !MDLocation(line: 13, scope: !24, inlinedAt: !38)
 !63 = !{i32 undef}
-!64 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50, inlinedAt: !40)
+!64 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
 !65 = !MDSubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !25, type: !47, declaration: !46, variables: !66)
 !66 = !{!67, !69, !70}
 !67 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !65, type: !68)
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index a88acf0..acaba6d 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -47,7 +47,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -83,7 +83,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -115,7 +115,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -147,7 +147,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -180,7 +180,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -214,7 +214,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -248,7 +248,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -282,7 +282,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -313,7 +313,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -345,7 +345,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -377,7 +377,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -409,7 +409,7 @@ entry:
   %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
   ret void
 }
 
@@ -442,7 +442,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -476,7 +476,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -510,7 +510,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -544,7 +544,7 @@ entry:
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
-  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
   ret void
 }
 
@@ -571,7 +571,7 @@ entry:
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
   %0 = load i8*, i8** %a.addr, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
 
@@ -599,7 +599,7 @@ entry:
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
   %0 = load i8*, i8** %a.addr, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
 
@@ -627,7 +627,7 @@ entry:
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
   %0 = load i8*, i8** %a.addr, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
 
@@ -655,7 +655,7 @@ entry:
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
   %0 = load i8*, i8** %a.addr, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
 
@@ -808,7 +808,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
   %0 = ptrtoint i32* %a to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -835,7 +835,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
   %0 = ptrtoint i32* %a to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -862,7 +862,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
   %0 = ptrtoint i32* %a to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -889,7 +889,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
   %0 = ptrtoint i32* %a to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -1021,7 +1021,7 @@ entry:
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
   ret void
 }
 
@@ -1051,7 +1051,7 @@ entry:
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
   ret void
 }
 
@@ -1081,7 +1081,7 @@ entry:
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
   ret void
 }
 
@@ -1111,7 +1111,7 @@ entry:
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
   ret void
 }
 
@@ -1155,7 +1155,7 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
@@ -1200,7 +1200,7 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
@@ -1245,7 +1245,7 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
@@ -1290,7 +1290,7 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
@@ -1319,7 +1319,7 @@ entry:
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
   %0 = load i32*, i32** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
 
@@ -1349,7 +1349,7 @@ entry:
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
   %0 = load i32*, i32** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
 
@@ -1379,7 +1379,7 @@ entry:
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
   %0 = load i32*, i32** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
 
@@ -1409,7 +1409,7 @@ entry:
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
   %0 = load i32*, i32** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
 
@@ -1437,7 +1437,7 @@ entry:
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   %0 = ptrtoint i32* %y to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -1466,7 +1466,7 @@ entry:
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   %0 = ptrtoint i32* %y to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -1494,7 +1494,7 @@ entry:
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   %0 = ptrtoint i32* %y to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -1523,7 +1523,7 @@ entry:
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   %0 = ptrtoint i32* %y to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
   ret void
 }
 
@@ -1549,7 +1549,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
@@ -1576,7 +1576,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
@@ -1603,7 +1603,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
@@ -1630,7 +1630,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
@@ -1656,7 +1656,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
@@ -1683,7 +1683,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
@@ -1710,7 +1710,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
@@ -1737,7 +1737,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
@@ -1768,7 +1768,7 @@ entry:
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
   %1 = load float*, float** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
 
@@ -1800,7 +1800,7 @@ entry:
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
   %1 = load float*, float** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
 
@@ -1832,7 +1832,7 @@ entry:
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
   %1 = load float*, float** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
 
@@ -1864,7 +1864,7 @@ entry:
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
   %1 = load float*, float** %b, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
 
@@ -2006,7 +2006,7 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
@@ -2034,7 +2034,7 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
@@ -2062,7 +2062,7 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
@@ -2090,7 +2090,7 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
@@ -3152,7 +3152,7 @@ entry:
   %b = getelementptr inbounds %struct.nest, %struct.nest* %c, i32 0, i32 1
   %_a = getelementptr inbounds %struct.pair, %struct.pair* %b, i32 0, i32 0
   %0 = load i32, i32* %_a, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0)
   ret void
 }
 
@@ -3181,7 +3181,7 @@ bb:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %tmp = alloca %struct.small*, align 8
-  %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp)
+  %tmp1 = call i32 (...) @dummy(%struct.small** %tmp)
   %tmp2 = load %struct.small*, %struct.small** %tmp, align 8
   %tmp3 = ptrtoint %struct.small* %tmp2 to i64
   %tmp4 = trunc i64 %tmp3 to i32
@@ -3209,7 +3209,7 @@ bb17:                                             ; preds = %bb6
 
 bb21:                                             ; preds = %bb6, %bb
   %tmp22 = phi i32 [ %tmp1, %bb ], [ %tmp14, %bb6 ]
-  %tmp23 = call i32 (...)* @dummy(i32 %tmp22)
+  %tmp23 = call i32 (...) @dummy(i32 %tmp22)
   ret i32 undef
 }
 
@@ -3235,7 +3235,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %test = alloca [32 x i8], align 16
   %arraydecay = getelementptr inbounds [32 x i8], [32 x i8]* %test, i32 0, i32 0
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
   ret i32 %call
 }
 
@@ -3261,7 +3261,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %test = alloca [33 x i8], align 16
   %arraydecay = getelementptr inbounds [33 x i8], [33 x i8]* %test, i32 0, i32 0
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
   ret i32 %call
 }
 
@@ -3287,7 +3287,7 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %test = alloca [4 x i8], align 1
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %test, i32 0, i32 0
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
   ret i32 %call
 }
 
@@ -3313,7 +3313,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %test = alloca [5 x i8], align 1
   %arraydecay = getelementptr inbounds [5 x i8], [5 x i8]* %test, i32 0, i32 0
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
   ret i32 %call
 }
 
@@ -3347,7 +3347,7 @@ entry:
   %3 = load i64, i64* %2, align 1
   %4 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 1
   %5 = load i8, i8* %4, align 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
   ret i32 %call
 }
 
@@ -3381,7 +3381,7 @@ entry:
   %3 = load i64, i64* %2, align 1
   %4 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 1
   %5 = load i8, i8* %4, align 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
   ret i32 %call
 }
 
@@ -3410,7 +3410,7 @@ entry:
   %0 = alloca i8, i64 4
   store i8* %0, i8** %test, align 8
   %1 = load i8*, i8** %test, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %1)
   ret i32 %call
 }
 
@@ -3438,7 +3438,7 @@ entry:
   %0 = alloca i8, i64 5
   store i8* %0, i8** %test, align 8
   %1 = load i8*, i8** %test, align 8
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %1)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %1)
   ret i32 %call
 }
 
diff --git a/test/CodeGen/X86/stackmap-fast-isel.ll b/test/CodeGen/X86/stackmap-fast-isel.ll
index d2155bd..1392e5b 100644
--- a/test/CodeGen/X86/stackmap-fast-isel.ll
+++ b/test/CodeGen/X86/stackmap-fast-isel.ll
@@ -99,7 +99,7 @@
 
 define void @constantargs() {
 entry:
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 15, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 15, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
   ret void
 }
 
@@ -116,7 +116,7 @@ entry:
 ; CHECK-NEXT:   .long   33
 
 define void @liveConstant() {
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
   ret void
 }
 
@@ -139,7 +139,7 @@ entry:
   store i64 11, i64* %metadata1
   store i64 12, i64* %metadata1
   store i64 13, i64* %metadata1
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
   ret void
 }
 
@@ -155,10 +155,10 @@ entry:
 ; CHECK-LABEL:  .long L{{.*}}-_longid
 define void @longid() {
 entry:
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4294967295, i32 0)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4294967296, i32 0)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 9223372036854775807, i32 0)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 -1, i32 0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4294967295, i32 0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4294967296, i32 0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 9223372036854775807, i32 0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 -1, i32 0)
   ret void
 }
 
diff --git a/test/CodeGen/X86/stackmap-large-constants.ll b/test/CodeGen/X86/stackmap-large-constants.ll
index 73ee4f3..a38b920 100644
--- a/test/CodeGen/X86/stackmap-large-constants.ll
+++ b/test/CodeGen/X86/stackmap-large-constants.ll
@@ -51,7 +51,7 @@
 declare void @llvm.experimental.stackmap(i64, i32, ...)
 
 define void @foo() {
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0, i64 9223372036854775807)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0, i64 9223372036854775807)
   ret void
 }
 
@@ -78,6 +78,6 @@ define void @foo() {
 
 
 define void @bar() {
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0, i64 -9223372036854775808)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0, i64 -9223372036854775808)
   ret void
 }
diff --git a/test/CodeGen/X86/stackmap-liveness.ll b/test/CodeGen/X86/stackmap-liveness.ll
index 31553c0..599b626 100644
--- a/test/CodeGen/X86/stackmap-liveness.ll
+++ b/test/CodeGen/X86/stackmap-liveness.ll
@@ -50,7 +50,7 @@ entry:
 ; PATCH-NEXT:   .byte 16
 ; Align
 ; PATCH-NEXT:   .align  3
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 12, i8* null, i32 0)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 12, i8* null, i32 0)
   %a2 = call i64 asm sideeffect "", "={r8}"() nounwind
   %a3 = call i8 asm sideeffect "", "={ah}"() nounwind
   %a4 = call <4 x double> asm sideeffect "", "={ymm0}"() nounwind
@@ -97,7 +97,7 @@ entry:
 ; PATCH-NEXT:   .byte 16
 ; Align
 ; PATCH-NEXT:   .align  3
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 2, i32 12, i8* null, i32 0)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 2, i32 12, i8* null, i32 0)
   call void asm sideeffect "", "{r8},{ah},{ymm0},{ymm1}"(i64 %a2, i8 %a3, <4 x double> %a4, <4 x double> %a5) nounwind
 
 ; StackMap 3 (no liveness information available)
@@ -129,7 +129,7 @@ entry:
 ; PATCH-NEXT:   .byte 16
 ; Align
 ; PATCH-NEXT:   .align  3
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 12, i8* null, i32 0)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 12, i8* null, i32 0)
   call void asm sideeffect "", "{xmm2}"(<2 x double> %a1) nounwind
   ret void
 }
@@ -166,8 +166,8 @@ entry:
 ; PATCH-NEXT:   .byte 16
 ; Align
 ; PATCH-NEXT:   .align  3
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 5)
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 0, i8* null, i32 0)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 5)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 5, i32 0, i8* null, i32 0)
   call void asm sideeffect "", "{xmm2}"(<2 x double> %a1) nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/stackmap-nops.ll b/test/CodeGen/X86/stackmap-nops.ll
index 7932c0d..08fee2e 100644
--- a/test/CodeGen/X86/stackmap-nops.ll
+++ b/test/CodeGen/X86/stackmap-nops.ll
@@ -193,41 +193,41 @@ entry:
 ; CHECK-NEXT: .byte 102
 ; CHECK-NEXT: .byte 102
 ; CHECK-NEXT: nopw %cs:512(%rax,%rax)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  0)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  1, i32  1)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  2, i32  2)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  3, i32  3)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  4, i32  4)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  5, i32  5)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  6, i32  6)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  7, i32  7)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  8, i32  8)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  9, i32  9)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 10, i32 10)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 11, i32 11)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 12)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 13)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 14)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 15)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 16)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 17)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 18, i32 18)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 19, i32 19)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 20, i32 20)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 21, i32 21)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 22, i32 22)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 23, i32 23)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 24, i32 24)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 25, i32 25)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 26, i32 26)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 27, i32 27)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 28, i32 28)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 29, i32 29)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 30, i32 30)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  0, i32  0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  1, i32  1)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  2, i32  2)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  3, i32  3)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  4, i32  4)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  5, i32  5)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  6, i32  6)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  7, i32  7)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  8, i32  8)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  9, i32  9)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 10, i32 10)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 11, i32 11)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 12)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 13)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 14)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 15)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 16)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 17)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 18, i32 18)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 19, i32 19)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 20, i32 20)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 21, i32 21)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 22, i32 22)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 23, i32 23)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 24, i32 24)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 25, i32 25)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 26, i32 26)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 27, i32 27)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 28, i32 28)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 29, i32 29)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 30, i32 30)
 ; Add an extra stackmap with a zero-length shadow to thwart the shadow
 ; optimization. This will force all 15 bytes of the previous shadow to be
 ; padded with nops.
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 31, i32 0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 31, i32 0)
   ret void
 }
 
diff --git a/test/CodeGen/X86/stackmap-shadow-optimization.ll b/test/CodeGen/X86/stackmap-shadow-optimization.ll
index a3725f2..001d8d9 100644
--- a/test/CodeGen/X86/stackmap-shadow-optimization.ll
+++ b/test/CodeGen/X86/stackmap-shadow-optimization.ll
@@ -18,7 +18,7 @@ entry:
 ; CHECK:        callq   _bar
 ; CHECK-NOT:    nop
   call void @bar()
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 8)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 8)
   call void @bar()
   call void @bar()
   ret void
diff --git a/test/CodeGen/X86/stackmap.ll b/test/CodeGen/X86/stackmap.ll
index fc958ec..0805e81 100644
--- a/test/CodeGen/X86/stackmap.ll
+++ b/test/CodeGen/X86/stackmap.ll
@@ -125,7 +125,7 @@
 define void @constantargs() {
 entry:
   %0 = inttoptr i64 12345 to i8*
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
   ret void
 }
 
@@ -147,7 +147,7 @@ entry:
   ; Runtime void->void call.
   call void inttoptr (i64 -559038737 to void ()*)()
   ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
   ret void
 }
 
@@ -173,7 +173,7 @@ entry:
 cold:
   ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
   %thunk = inttoptr i64 -559038737 to i8*
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b)
   unreachable
 ret:
   ret void
@@ -194,7 +194,7 @@ ret:
 define i64 @propertyRead(i64* %obj) {
 entry:
   %resolveRead = inttoptr i64 -559038737 to i8*
-  %result = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveRead, i32 1, i64* %obj)
+  %result = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveRead, i32 1, i64* %obj)
   %add = add i64 %result, 3
   ret i64 %add
 }
@@ -214,7 +214,7 @@ entry:
 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
 entry:
   %resolveWrite = inttoptr i64 -559038737 to i8*
-  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
   ret void
 }
 
@@ -236,7 +236,7 @@ entry:
 define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
   %resolveCall = inttoptr i64 -559038737 to i8*
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
   ret void
 }
 
@@ -258,7 +258,7 @@ entry:
 define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
   %resolveCall = inttoptr i64 -559038737 to i8*
-  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
   %add = add i64 %result, 3
   ret i64 %add
 }
@@ -278,7 +278,7 @@ entry:
 ; CHECK-NEXT:   .short 6
 define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
 entry:
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
   ret void
 }
 
@@ -297,7 +297,7 @@ entry:
 ; CHECK-NEXT:   .short 6
 define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
 entry:
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
   ret void
 }
 
@@ -333,7 +333,7 @@ bb17:
 
 bb60:
   tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 5, i32 %tmp32)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 5, i32 %tmp32)
   unreachable
 
 bb61:
@@ -367,7 +367,7 @@ define void @subRegOffset(i16 %arg) {
   %arghi = lshr i16 %v, 8
   %a1 = trunc i16 %arghi to i8
   tail call void asm sideeffect "nop", "~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 5, i8 %a0, i8 %a1)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 5, i8 %a0, i8 %a1)
   ret void
 }
 
@@ -384,7 +384,7 @@ define void @subRegOffset(i16 %arg) {
 ; CHECK-NEXT:   .long   33
 
 define void @liveConstant() {
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
   ret void
 }
 
@@ -422,10 +422,10 @@ entry:
   store i64 11, i64* %metadata1
   store i64 12, i64* %metadata1
   store i64 13, i64* %metadata1
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
   %metadata2 = alloca i8, i32 4, align 8
   %metadata3 = alloca i16, i32 4, align 8
-  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 17, i32 5, i8* null, i32 0, i8* %metadata2, i16* %metadata3)
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 17, i32 5, i8* null, i32 0, i8* %metadata2, i16* %metadata3)
   ret void
 }
 
@@ -441,10 +441,10 @@ entry:
 ; CHECK-LABEL:  .long L{{.*}}-_longid
 define void @longid() {
 entry:
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0)
   ret void
 }
 
@@ -462,7 +462,7 @@ entry:
 ; CHECK-NEXT:   .long   -{{[0-9]+}}
 define void @clobberScratch(i32 %a) {
   tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r12},~{r13},~{r14},~{r15}"() nounwind
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
   ret void
 }
 
@@ -474,11 +474,11 @@ define void @clobberScratch(i32 %a) {
 ; CHECK-NEXT:   .short 0
 define void @needsStackRealignment() {
   %val = alloca i64, i32 3, align 128
-  tail call void (...)* @escape_values(i64* %val)
+  tail call void (...) @escape_values(i64* %val)
 ; Note: Adding any non-constant to the stackmap would fail because we
 ; expected to be able to address off the frame pointer.  In a realigned
 ; frame, we must use the stack pointer instead.  This is a separate bug.
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0)
   ret void
 }
 declare void @escape_values(...)
diff --git a/test/CodeGen/X86/statepoint-allocas.ll b/test/CodeGen/X86/statepoint-allocas.ll
new file mode 100644
index 0000000..eb34a42
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-allocas.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s | FileCheck %s
+; Check that we can lower a use of an alloca both as a deopt value (where the
+; exact meaning is up to the consumer of the stackmap) and as an explicit spill
+; slot used for GC.  
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare zeroext i1 @return_i1()
+
+; Can we handle an explicit relocation slot (in the form of an alloca) given 
+; to the statepoint?
+define i32 addrspace(1)* @test(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test
+; CHECK: pushq  %rax
+; CHECK: movq   %rdi, (%rsp)
+; CHECK: callq return_i1
+; CHECK: movq   (%rsp), %rax
+; CHECK: popq   %rdx
+; CHECK: retq
+entry:
+  %alloca = alloca i32 addrspace(1)*, align 8
+  store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
+  call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)** %alloca)
+  %rel = load i32 addrspace(1)*, i32 addrspace(1)** %alloca
+  ret i32 addrspace(1)* %rel
+}
+
+; Can we handle an alloca as a deopt value?  
+define i32 addrspace(1)* @test2(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test2
+; CHECK: pushq  %rax
+; CHECK: movq   %rdi, (%rsp)
+; CHECK: callq return_i1
+; CHECK: xorl   %eax, %eax
+; CHECK: popq   %rdx
+; CHECK: retq
+entry:
+  %alloca = alloca i32 addrspace(1)*, align 8
+  store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
+  call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 1, i32 addrspace(1)** %alloca)
+  ret i32 addrspace(1)* null
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
+
+
+; CHECK-LABEL: .section .llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 2
+; Num LargeConstants
+; CHECK-NEXT:   .long 0
+; Num Callsites
+; CHECK-NEXT:   .long 2
+
+; Functions and stack size
+; CHECK-NEXT:   .quad test
+; CHECK-NEXT:   .quad 8
+; CHECK-NEXT:   .quad test2
+; CHECK-NEXT:   .quad 8
+
+; Large Constants
+; Statepoint ID only
+; CHECK: .quad	2882400000
+
+; Callsites
+; The GC one
+; CHECK: .long	.Ltmp1-test
+; CHECK: .short	0
+; CHECK: .short	3
+; SmallConstant (0)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	0
+; SmallConstant (0)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	0
+; Direct Spill Slot [RSP+0]
+; CHECK: .byte	2
+; CHECK: .byte	8
+; CHECK: .short	7
+; CHECK: .long	0
+; No Padding or LiveOuts
+; CHECK: .short	0
+; CHECK: .short	0
+; CHECK: .align	8
+
+; The Deopt one
+; CHECK: .long	.Ltmp3-test2
+; CHECK: .short	0
+; CHECK: .short	3
+; SmallConstant (0)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	0
+; SmallConstant (1)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	1
+; Direct Spill Slot [RSP+0]
+; CHECK: .byte	2
+; CHECK: .byte	8
+; CHECK: .short	7
+; CHECK: .long	0
+
+; No Padding or LiveOuts
+; CHECK: .short	0
+; CHECK: .short	0
+; CHECK: .align	8
+
+
diff --git a/test/CodeGen/X86/statepoint-call-lowering.ll b/test/CodeGen/X86/statepoint-call-lowering.ll
index 22049cf..9f6e4f9 100644
--- a/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -20,7 +20,7 @@ define i1 @test_i1_return() gc "statepoint-example" {
 ; CHECK: popq %rdx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0)
+  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0)
   %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
   ret i1 %call1
 }
@@ -32,7 +32,7 @@ define i32 @test_i32_return() gc "statepoint-example" {
 ; CHECK: popq %rdx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i32 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()* @return_i32, i32 0, i32 0, i32 0)
+  %safepoint_token = tail call i32 (i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()* @return_i32, i32 0, i32 0, i32 0)
   %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
   ret i32 %call1
 }
@@ -44,7 +44,7 @@ define i32* @test_i32ptr_return() gc "statepoint-example" {
 ; CHECK: popq %rdx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i32* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()* @return_i32ptr, i32 0, i32 0, i32 0)
+  %safepoint_token = tail call i32 (i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()* @return_i32ptr, i32 0, i32 0, i32 0)
   %call1 = call i32* @llvm.experimental.gc.result.p0i32(i32 %safepoint_token)
   ret i32* %call1
 }
@@ -56,7 +56,7 @@ define float @test_float_return() gc "statepoint-example" {
 ; CHECK: popq %rax
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (float ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_f32f(float ()* @return_float, i32 0, i32 0, i32 0)
+  %safepoint_token = tail call i32 (float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(float ()* @return_float, i32 0, i32 0, i32 0)
   %call1 = call float @llvm.experimental.gc.result.f32(i32 %safepoint_token)
   ret float %call1
 }
@@ -70,7 +70,7 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
 ; CHECK-NEXT: popq %rdx
 ; CHECK-NEXT: retq
 entry:
-  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
+  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
   %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
   %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
   ret i1 %call2
@@ -81,7 +81,7 @@ define void @test_void_vararg() gc "statepoint-example" {
 ; Check a statepoint wrapping a *void* returning vararg function works
 ; CHECK: callq varargf
 entry:
-  %safepoint_token = tail call i32 (void (i32, ...)*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0)
+  %safepoint_token = tail call i32 (void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0)
   ;; if we try to use the result from a statepoint wrapping a
   ;; non-void-returning varargf, we will experience a crash.
   ret void
diff --git a/test/CodeGen/X86/statepoint-forward.ll b/test/CodeGen/X86/statepoint-forward.ll
index 5a1b18a..0b296cf 100644
--- a/test/CodeGen/X86/statepoint-forward.ll
+++ b/test/CodeGen/X86/statepoint-forward.ll
@@ -25,7 +25,7 @@ entry:
   %before = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p
   %cmp1 = call i1 @f(i32 addrspace(1)* %before)
   call void @llvm.assume(i1 %cmp1)
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
   %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
   %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
@@ -44,7 +44,7 @@ entry:
   %cmp1 = call i1 @f(i32 addrspace(1)* %v)
   call void @llvm.assume(i1 %cmp1)
   store i32 addrspace(1)* %v, i32 addrspace(1)* addrspace(1)* %p
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
   %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
   %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
@@ -72,7 +72,7 @@ entry:
   %before = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp1 = call i1 @f(i32 addrspace(1)* %before)
   call void @llvm.assume(i1 %cmp1)
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
   %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
@@ -90,7 +90,7 @@ entry:
   %cmp1 = call i1 @f(i32 addrspace(1)* %v)
   call void @llvm.assume(i1 %cmp1)
   store i32 addrspace(1)* %v, i32 addrspace(1)** %p
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
   %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
diff --git a/test/CodeGen/X86/statepoint-invoke.ll b/test/CodeGen/X86/statepoint-invoke.ll
index 91bf46a..177eb96 100644
--- a/test/CodeGen/X86/statepoint-invoke.ll
+++ b/test/CodeGen/X86/statepoint-invoke.ll
@@ -6,7 +6,9 @@ declare i64 addrspace(1)* @"some_other_call"(i64 addrspace(1)*)
 
 declare i32 @"personality_function"()
 
-define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) {
+define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, 
+                                      i64 addrspace(1)* %obj1)
+  gc "statepoint-example" {
 entry:
   ; CHECK: .Ltmp{{[0-9]+}}:
   ; CHECK: callq some_other_call
diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll
index 3ecef33..a968c03 100644
--- a/test/CodeGen/X86/statepoint-stack-usage.ll
+++ b/test/CodeGen/X86/statepoint-stack-usage.ll
@@ -8,20 +8,20 @@ target triple = "x86_64-pc-linux-gnu"
 ; of GC arguments differ, niave lowering code would insert loads and 
 ; stores to rearrange items on the stack.  We need to make sure (for
 ; performance) that this doesn't happen.
-define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 {
+define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
 ; CHECK-LABEL: back_to_back_calls
 ; The exact stores don't matter, but there need to be three stack slots created
 ; CHECK: movq	%rdx, 16(%rsp)
 ; CHECK: movq	%rdi, 8(%rsp)
 ; CHECK: movq	%rsi, (%rsp)
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
   %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
   %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
   %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 11)
 ; CHECK: callq
 ; This is the key check.  There should NOT be any memory moves here
 ; CHECK-NOT: movq
-  %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+  %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
   %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 9, i32 11)
   %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 9, i32 10)
   %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 9, i32 9)
@@ -31,20 +31,20 @@ define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 a
 
 ; This test simply checks that minor changes in vm state don't prevent slots
 ; being reused for gc values.  
-define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 {
+define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
 ; CHECK-LABEL: reserve_first
 ; The exact stores don't matter, but there need to be three stack slots created
 ; CHECK: movq	%rdx, 16(%rsp)
 ; CHECK: movq	%rdi, 8(%rsp)
 ; CHECK: movq	%rsi, (%rsp)
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
   %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
   %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
   %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 11)
 ; CHECK: callq
 ; This is the key check.  There should NOT be any memory moves here
 ; CHECK-NOT: movq
-  %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+  %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
   %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 9, i32 11)
   %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 9, i32 10)
   %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 9, i32 9)
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
index e452a63..9593c40 100644
--- a/test/CodeGen/X86/statepoint-stackmap-format.ll
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -21,7 +21,7 @@ define i1 @test(i32 addrspace(1)* %ptr) gc "statepoint-example" {
 entry:
   %metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
   store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
-  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 addrspace(1)* null, i32 addrspace(1)* %ptr, i32 addrspace(1)* null)
+  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 addrspace(1)* null, i32 addrspace(1)* %ptr, i32 addrspace(1)* null)
   %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
   %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 6, i32 6)
   %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 34f4066..fa00d6f 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -11,11 +11,11 @@ entry:
   br i1 %obit, label %overflow, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
 
 overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 
 ; CHECK-LABEL: func1:
@@ -31,11 +31,11 @@ entry:
   br i1 %obit, label %carry, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
 
 carry:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 
 ; CHECK-LABEL: func2:
diff --git a/test/CodeGen/X86/switch-crit-edge-constant.ll b/test/CodeGen/X86/switch-crit-edge-constant.ll
index b44385c..e9a208d 100644
--- a/test/CodeGen/X86/switch-crit-edge-constant.ll
+++ b/test/CodeGen/X86/switch-crit-edge-constant.ll
@@ -35,19 +35,19 @@ cond_true:		; preds = %bb2
 
 blahaha:		; preds = %cond_true, %bb2, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
 	%s.0 = phi i8* [ getelementptr ([8 x i8], [8 x i8]* @str, i32 0, i64 0), %cond_true ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str2, i32 0, i64 0), %bb2 ]		; <i8*> [#uses=13]
-	%tmp8 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp10 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp12 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp14 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp16 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp18 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp20 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp22 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp24 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp26 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp28 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp30 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
-	%tmp32 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp8 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp10 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp12 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp14 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp16 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp18 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp20 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp22 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp24 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp26 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp28 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp30 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp32 = tail call i32 (i8*, ...) @printf( i8* %s.0 )		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/CodeGen/X86/switch-or.ll b/test/CodeGen/X86/switch-or.ll
index 75832c7..6e6b013 100644
--- a/test/CodeGen/X86/switch-or.ll
+++ b/test/CodeGen/X86/switch-or.ll
@@ -12,7 +12,7 @@ entry:
   ]
 
 if.then:
-  %call = tail call i32 (...)* @bar() nounwind
+  %call = tail call i32 (...) @bar() nounwind
   ret void
 
 if.end:
diff --git a/test/CodeGen/X86/tail-call-win64.ll b/test/CodeGen/X86/tail-call-win64.ll
index 8811b75..fb10d5d 100644
--- a/test/CodeGen/X86/tail-call-win64.ll
+++ b/test/CodeGen/X86/tail-call-win64.ll
@@ -4,7 +4,7 @@
 ; in-function jumps from function exiting jumps.
 
 define void @tail_jmp_reg(i32, i32, void ()* %fptr) {
-  tail call void ()* %fptr()
+  tail call void () %fptr()
   ret void
 }
 
@@ -28,7 +28,7 @@ define void @tail_jmp_imm() {
 
 define void @tail_jmp_mem() {
   %fptr = load void ()*, void ()** @g_fptr
-  tail call void ()* %fptr()
+  tail call void () %fptr()
   ret void
 }
 
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
index f4d51c2..9e054fe 100644
--- a/test/CodeGen/X86/tailcall-64.ll
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -215,7 +215,7 @@ entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*], [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom
   %0 = load i32 (i8*, ...)*, i32 (i8*, ...)** %arrayidx, align 8
-  %call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
+  %call = tail call i32 (i8*, ...) %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
   ret i32 %call
 }
 
diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
index 4e1fc43..f69e75c 100644
--- a/test/CodeGen/X86/tailcall-fastisel.ll
+++ b/test/CodeGen/X86/tailcall-fastisel.ll
@@ -11,7 +11,7 @@ fail:                                             ; preds = %entry
 
 define i32 @foo() nounwind {
 entry:
- %0 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @bar() nounwind       ; <i32> [#uses=1]
  ret i32 %0
 }
 
diff --git a/test/CodeGen/X86/tailcall-mem-intrinsics.ll b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
new file mode 100644
index 0000000..0e0ab5c
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy
+; CHECK: jmp memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memset
+; CHECK; jmp memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: tail_memset
+; CHECK: jmp memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index d3498a4..38685ec 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -12,7 +12,7 @@ bb1:		; preds = %bb1, %bb1.thread
 	%0 = trunc i32 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
 	%1 = sdiv i8 %0, 2		; <i8> [#uses=1]
 	%2 = sext i8 %1 to i32		; <i32> [#uses=1]
-	%3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind		; <i32> [#uses=0]
+	%3 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind		; <i32> [#uses=0]
 	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, 258		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb2, label %bb1
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index ca764e7..a1074a6 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -6,13 +6,13 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; CHECK: testq %rdi, %rdi
+; CHECK: andl
+; CHECK-NEXT: testq %rdi, %rdi
 ; CHECK-NEXT: js LBB0_1
 ; CHECK: cvtsi2ss
 ; CHECK-NEXT: ret
 ; CHECK: LBB0_1
 ; CHECK: shrq
-; CHECK-NEXT: andq
 ; CHECK-NEXT: orq
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
index c930c16..6435760 100644
--- a/test/CodeGen/X86/umul-with-carry.ll
+++ b/test/CodeGen/X86/umul-with-carry.ll
@@ -14,11 +14,11 @@ entry:
   br i1 %obit, label %carry, label %normal
 
 normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
 
 carry:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
+  %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 }
 
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index 47d0811..2ed1acf 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -32,5 +32,5 @@ entry:
 !8 = !MDLocation(line: 4, column: 3, scope: !7)
 !9 = !{!1}
 !10 = !MDFile(filename: "test.c", directory: "/dir")
-!11 = !{i32 0}
+!11 = !{}
 !12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/utf16-cfstrings.ll b/test/CodeGen/X86/utf16-cfstrings.ll
index b49eecf..5f0e78f 100644
--- a/test/CodeGen/X86/utf16-cfstrings.ll
+++ b/test/CodeGen/X86/utf16-cfstrings.ll
@@ -21,7 +21,7 @@ define i32 @main() uwtable ssp {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  call void (%0*, ...)* @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
+  call void (%0*, ...) @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
   ret i32 0
 }
 
diff --git a/test/CodeGen/X86/vararg-callee-cleanup.ll b/test/CodeGen/X86/vararg-callee-cleanup.ll
index 2dcf319..bb1104d 100644
--- a/test/CodeGen/X86/vararg-callee-cleanup.ll
+++ b/test/CodeGen/X86/vararg-callee-cleanup.ll
@@ -4,8 +4,8 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 
 declare x86_thiscallcc void @thiscall_thunk(i8* %this, ...)
 define i32 @call_varargs_thiscall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
-  call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
-  call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
+  call x86_thiscallcc void (i8*, ...) @thiscall_thunk(i8* %a, i32 1, i32 2)
+  call x86_thiscallcc void (i8*, ...) @thiscall_thunk(i8* %a, i32 1, i32 2)
   %t1 = add i32 %b, %c
   %r = add i32 %t1, %d
   ret i32 %r
@@ -19,8 +19,8 @@ define i32 @call_varargs_thiscall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
 
 declare x86_stdcallcc void @stdcall_thunk(i8* %this, ...)
 define i32 @call_varargs_stdcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
-  call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
-  call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
+  call x86_stdcallcc void (i8*, ...) @stdcall_thunk(i8* %a, i32 1, i32 2)
+  call x86_stdcallcc void (i8*, ...) @stdcall_thunk(i8* %a, i32 1, i32 2)
   %t1 = add i32 %b, %c
   %r = add i32 %t1, %d
   ret i32 %r
@@ -32,8 +32,8 @@ define i32 @call_varargs_stdcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
 
 declare x86_fastcallcc void @fastcall_thunk(i8* %this, ...)
 define i32 @call_varargs_fastcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
-  call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
-  call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+  call x86_fastcallcc void (i8*, ...) @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+  call x86_fastcallcc void (i8*, ...) @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
   %t1 = add i32 %b, %c
   %r = add i32 %t1, %d
   ret i32 %r
diff --git a/test/CodeGen/X86/vararg_tailcall.ll b/test/CodeGen/X86/vararg_tailcall.ll
index 9b76bdd..98aa4a8 100644
--- a/test/CodeGen/X86/vararg_tailcall.ll
+++ b/test/CodeGen/X86/vararg_tailcall.ll
@@ -15,7 +15,7 @@
 ; WIN64: callq
 define void @foo(i64 %arg) nounwind optsize ssp noredzone {
 entry:
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
   ret void
 }
 
@@ -40,7 +40,7 @@ declare void @bar2(i8*, i64) optsize noredzone
 define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
 entry:
   %tmp1 = load i8*, i8** @sel, align 8
-  %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
+  %call = tail call i8* (i8*, i8*, ...) @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
   ret i8* %call
 }
 
@@ -56,7 +56,7 @@ entry:
   %tmp3 = load i8*, i8** @sel4, align 8
   %tmp4 = load i8*, i8** @sel5, align 8
   %tmp5 = load i8*, i8** @sel6, align 8
-  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
+  %call = tail call i8* (i8*, i8*, i8*, ...) @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
   ret i8* %call
 }
 
@@ -73,7 +73,7 @@ entry:
   %tmp4 = load i8*, i8** @sel5, align 8
   %tmp5 = load i8*, i8** @sel6, align 8
   %tmp6 = load i8*, i8** @sel7, align 8
-  %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
+  %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
   ret i8* %call
 }
 
@@ -89,6 +89,6 @@ entry:
   %tmp3 = load i8*, i8** @sel4, align 8
   %tmp4 = load i8*, i8** @sel5, align 8
   %tmp5 = load i8*, i8** @sel6, align 8
-  %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
+  %call = tail call i8* (i8*, i8*, i8*, ...) @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
   ret i8* %call
 }
diff --git a/test/CodeGen/X86/variadic-node-pic.ll b/test/CodeGen/X86/variadic-node-pic.ll
index 1182a30..704459e 100644
--- a/test/CodeGen/X86/variadic-node-pic.ll
+++ b/test/CodeGen/X86/variadic-node-pic.ll
@@ -6,6 +6,6 @@ target triple = "x86_64-apple-darwin8"
 declare void @xscanf(i64) nounwind 
 
 define void @foo() nounwind  {
-	call void (i64)* @xscanf( i64 0 ) nounwind
+	call void (i64) @xscanf( i64 0 ) nounwind
 	unreachable
 }
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 07cd195..e507895 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -100,37 +100,29 @@ define <8 x i8> @foo3_8(<8 x float> %src) {
 ;
 ; CHECK-WIDE-LABEL: foo3_8:
 ; CHECK-WIDE:       ## BB#0:
-; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT:    shll $8, %eax
-; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %ecx
-; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
-; CHECK-WIDE-NEXT:    orl %eax, %ecx
-; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT:    shll $8, %eax
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %edx
-; CHECK-WIDE-NEXT:    movzbl %dl, %edx
-; CHECK-WIDE-NEXT:    orl %eax, %edx
-; CHECK-WIDE-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm1
-; CHECK-WIDE-NEXT:    vpinsrw $1, %ecx, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
+; CHECK-WIDE-NEXT:    vmovshdup %xmm0, %xmm2    ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpermilps $231, %xmm0, %xmm2 ## xmm2 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vmovshdup %xmm0, %xmm2    ## xmm2 = xmm0[1,1,3,3]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT:    shll $8, %eax
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
-; CHECK-WIDE-NEXT:    orl %eax, %ecx
-; CHECK-WIDE-NEXT:    vpinsrw $2, %ecx, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT:    shll $8, %eax
-; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
-; CHECK-WIDE-NEXT:    orl %eax, %ecx
-; CHECK-WIDE-NEXT:    vpinsrw $3, %ecx, %xmm1, %xmm0
+; CHECK-WIDE-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm0
 ; CHECK-WIDE-NEXT:    vzeroupper
 ; CHECK-WIDE-NEXT:    retl
   %res = fptosi <8 x float> %src to <8 x i8>
@@ -145,21 +137,17 @@ define <4 x i8> @foo3_4(<4 x float> %src) {
 ;
 ; CHECK-WIDE-LABEL: foo3_4:
 ; CHECK-WIDE:       ## BB#0:
-; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT:    shll $8, %eax
-; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %ecx
-; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
-; CHECK-WIDE-NEXT:    orl %eax, %ecx
-; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT:    shll $8, %eax
-; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %edx
-; CHECK-WIDE-NEXT:    movzbl %dl, %edx
-; CHECK-WIDE-NEXT:    orl %eax, %edx
-; CHECK-WIDE-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm0
-; CHECK-WIDE-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
+; CHECK-WIDE-NEXT:    vmovshdup %xmm0, %xmm2    ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm0
 ; CHECK-WIDE-NEXT:    retl
   %res = fptosi <4 x float> %src to <4 x i8>
   ret <4 x i8> %res
diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll
index 4db68bd..f35c4ab 100644
--- a/test/CodeGen/X86/vec_floor.ll
+++ b/test/CodeGen/X86/vec_floor.ll
@@ -180,3 +180,49 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+
+;
+; Constant Folding
+;
+
+define <2 x double> @const_floor_v2f64() {
+  ; CHECK: const_floor_v2f64
+  ; CHECK: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
+  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>)
+  ret <2 x double> %t
+}
+
+define <4 x float> @const_floor_v4f32() {
+  ; CHECK: const_floor_v4f32
+  ; CHECK: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
+  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
+  ret <4 x float> %t
+}
+
+define <2 x double> @const_ceil_v2f64() {
+  ; CHECK: const_ceil_v2f64
+  ; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
+  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>)
+  ret <2 x double> %t
+}
+
+define <4 x float> @const_ceil_v4f32() {
+  ; CHECK: const_ceil_v4f32
+  ; CHECK: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
+  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
+  ret <4 x float> %t
+}
+
+define <2 x double> @const_trunc_v2f64() {
+  ; CHECK: const_trunc_v2f64
+  ; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
+  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>)
+  ret <2 x double> %t
+}
+
+define <4 x float> @const_trunc_v4f32() {
+  ; CHECK: const_trunc_v4f32
+  ; CHECK: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
+  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
+  ret <4 x float> %t
+}
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 0f89515..4018a21 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -8,8 +8,8 @@ define void  @t1(i32 %a, x86_mmx* %P) nounwind {
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    shll $12, %ecx
 ; CHECK-NEXT:    movd %ecx, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
-; CHECK-NEXT:    movlpd %xmm0, (%eax)
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; CHECK-NEXT:    movq %xmm0, (%eax)
 ; CHECK-NEXT:    retl
  %tmp12 = shl i32 %a, 12
  %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
diff --git a/test/CodeGen/X86/vec_insert-mmx.ll b/test/CodeGen/X86/vec_insert-mmx.ll
index 447f97a..cbd4208 100644
--- a/test/CodeGen/X86/vec_insert-mmx.ll
+++ b/test/CodeGen/X86/vec_insert-mmx.ll
@@ -6,8 +6,8 @@ define x86_mmx @t0(i32 %A) nounwind {
 ; X86-32-LABEL: t0:
 ; X86-32:       ## BB#0:
 ; X86-32:    movd {{[0-9]+}}(%esp), %xmm0
-; X86-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
-; X86-32-NEXT:    movlpd %xmm0, (%esp)
+; X86-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; X86-32-NEXT:    movq %xmm0, (%esp)
 ; X86-32-NEXT:    movq (%esp), %mm0
 ; X86-32-NEXT:    addl $12, %esp
 ; X86-32-NEXT:    retl
diff --git a/test/CodeGen/X86/vec_reassociate.ll b/test/CodeGen/X86/vec_reassociate.ll
new file mode 100644
index 0000000..bf2053f
--- /dev/null
+++ b/test/CodeGen/X86/vec_reassociate.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s
+
+define <4 x i32> @add_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @add_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   paddd %xmm1, %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = add <4 x i32> %a0, <i32  1, i32 -2, i32  3, i32 -4>
+  %2 = add <4 x i32> %a1, <i32 -1, i32  2, i32 -3, i32  4>
+  %3 = add <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @add_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @add_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   paddd %xmm1, %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = add <4 x i32> <i32  1, i32 -2, i32  3, i32 -4>, %a0
+  %2 = add <4 x i32> <i32 -1, i32  2, i32 -3, i32  4>, %a1
+  %3 = add <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @mul_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   pmulld %xmm1, %xmm0
+  ;CHECK-NEXT:   pmulld .LCPI2_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>
+  %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @mul_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   pmulld %xmm1, %xmm0
+  ;CHECK-NEXT:   pmulld .LCPI3_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0
+  %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @and_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   andps %xmm1, %xmm0
+  ;CHECK-NEXT:   andps .LCPI4_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>
+  %2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>
+  %3 = and <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @and_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   andps %xmm1, %xmm0
+  ;CHECK-NEXT:   andps .LCPI5_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = and <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0
+  %2 = and <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1
+  %3 = and <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @or_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   orps %xmm1, %xmm0
+  ;CHECK-NEXT:   orps .LCPI6_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = or <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>
+  %2 = or <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>
+  %3 = or <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @or_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   orps %xmm1, %xmm0
+  ;CHECK-NEXT:   orps .LCPI7_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = or <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0 
+  %2 = or <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1
+  %3 = or <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @xor_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   xorps %xmm1, %xmm0
+  ;CHECK-NEXT:   xorps .LCPI8_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = xor <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>
+  %2 = xor <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>
+  %3 = xor <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @xor_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   xorps %xmm1, %xmm0
+  ;CHECK-NEXT:   xorps .LCPI9_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = xor <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0
+  %2 = xor <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1
+  %3 = xor <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index afde0ed..8ed8083 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -9,7 +9,7 @@
 
 define void @test1() {
 ;CHECK-LABEL: @test1
-;CHECK: xorpd
+;CHECK: xorps
   store <1 x i64> zeroinitializer, <1 x i64>* @M1
   store <2 x i32> zeroinitializer, <2 x i32>* @M2
   ret void
@@ -17,7 +17,7 @@ define void @test1() {
 
 define void @test2() {
 ;CHECK-LABEL: @test2
-;CHECK: pshufd
+;CHECK: pcmpeqd
   store <1 x i64> < i64 -1 >, <1 x i64>* @M1
   store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
   ret void
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 01b8972..53d13c8 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -634,28 +634,16 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
 }
 
 define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movzbl %dil, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movd %edi, %xmm0
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41:       # BB#0:
-; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT:    retq
+; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE:       # BB#0:
+; SSE-NEXT:    movzbl %dil, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    movzbl %dil, %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <16 x i8> undef, i8 %i, i32 0
   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -665,27 +653,28 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
 ; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    movzbl %dil, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
+; SSE2-NEXT:    shll   $8, %edi
+; SSE2-NEXT:    pxor   %xmm0, %xmm0
+; SSE2-NEXT:    pinsrw $2, %edi, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movd %edi, %xmm0
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    shll   $8, %edi
+; SSSE3-NEXT:    pxor   %xmm0, %xmm0
+; SSSE3-NEXT:    pinsrw $2, %edi, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pxor   %xmm0, %xmm0
+; SSE41-NEXT:    pinsrb $5, %edi, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    vpxor   %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $5, %edi, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <16 x i8> undef, i8 %i, i32 0
   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -693,16 +682,30 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 }
 
 define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
-; SSE-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
-; SSE:       # BB#0:
-; SSE-NEXT:    movd %edi, %xmm0
-; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; SSE-NEXT:    retq
+; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shll   $8, %edi
+; SSE2-NEXT:    pxor   %xmm0, %xmm0
+; SSE2-NEXT:    pinsrw $7, %edi, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    shll   $8, %edi
+; SSSE3-NEXT:    pxor   %xmm0, %xmm0
+; SSSE3-NEXT:    pinsrw $7, %edi, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor   %xmm0, %xmm0
+; SSE41-NEXT:    pinsrb $15, %edi, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; AVX-NEXT:    vpxor   %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $15, %edi, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <16 x i8> undef, i8 %i, i32 0
   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
@@ -713,29 +716,27 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 ; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movzbl %dil, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE2-NEXT:    pxor   %xmm0, %xmm0
+; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movd %edi, %xmm0
-; SSSE3-NEXT:    pslld $24, %xmm0
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    movzbl %dil, %eax
+; SSSE3-NEXT:    pxor   %xmm0, %xmm0
+; SSSE3-NEXT:    pinsrw $1, %eax, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    pslld $24, %xmm0
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pxor   %xmm0, %xmm0
+; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpslld $24, %xmm0, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    vpxor   %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $2, %edi, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <16 x i8> undef, i8 %i, i32 3
   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index eb77c38..4007f0b 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1384,16 +1384,14 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movzwl %di, %eax
-; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pinsrw $1, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    movzwl %di, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX-NEXT:    vpxor   %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $1, %edi, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
@@ -1403,16 +1401,14 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movzwl %di, %eax
-; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pinsrw $5, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    movzwl %di, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX-NEXT:    vpxor   %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $5, %edi, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
@@ -1422,14 +1418,14 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movd %edi, %xmm0
-; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pinsrw $7, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; AVX-NEXT:    vpxor   %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $7, %edi, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
@@ -1439,16 +1435,14 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movzwl %di, %eax
-; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pinsrw $2, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    movzwl %di, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT:    vpxor   %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $2, %edi, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 3
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll
index aad3702..df4994d 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -3249,3 +3249,15 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
   ret <16 x i16> %shuffle
 }
+
+define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
+; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
+; ALL:       # BB#0:
+; ALL-NEXT:    movzwl (%rdi), %eax
+; ALL-NEXT:    vmovd %eax, %xmm0
+; ALL-NEXT:    retq
+  %val = load i16, i16* %ptr
+  %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
+  ret <16 x i16> %i0
+}
+
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll
index f9f4b96..a0f43de 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -656,8 +656,6 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
 ; AVX2-NEXT:    movl $15, %eax
 ; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT:    vpblendd $15, %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 8aca67c..1b42a63 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -813,15 +813,11 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
 ; AVX1-LABEL: insert_reg_and_zero_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vmovq %rdi, %xmm0
-; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
-; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: insert_reg_and_zero_v4i64:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vmovq %rdi, %xmm0
-; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
 ; AVX2-NEXT:    retq
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -832,15 +828,11 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
 ; AVX1-LABEL: insert_mem_and_zero_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
-; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: insert_mem_and_zero_v4i64:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
 ; AVX2-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
@@ -851,8 +843,9 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
 ; ALL-LABEL: insert_reg_and_zero_v4f64:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT:    # kill: XMM0<def> XMM0<kill> YMM0<def>
+; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
 ; ALL-NEXT:    retq
   %v = insertelement <4 x double> undef, double %a, i32 0
   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll
index 417423a..bb07077 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -133,8 +133,6 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    movl $7, %eax
 ; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -962,8 +960,6 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    movl $7, %eax
 ; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -2090,3 +2086,20 @@ entry:
   %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x float> %res
 }
+
+define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
+; AVX1-LABEL: insert_mem_and_zero_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    retq
+  %a = load i32, i32* %ptr
+  %v = insertelement <8 x i32> undef, i32 %a, i32 0
+  %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i32> %shuffle
+}
+
diff --git a/test/CodeGen/X86/vector-shuffle-mmx.ll b/test/CodeGen/X86/vector-shuffle-mmx.ll
index 094722d..dbccd26 100644
--- a/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -9,7 +9,7 @@ define void @test0(<1 x i64>* %x) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X32-NEXT:    movlpd %xmm0, (%eax)
+; X32-NEXT:    movq %xmm0, (%eax)
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test0:
@@ -38,13 +38,13 @@ define void @test1() {
 ; X32-NEXT:    .cfi_def_cfa_offset 24
 ; X32-NEXT:  Ltmp2:
 ; X32-NEXT:    .cfi_offset %edi, -8
-; X32-NEXT:    xorpd %xmm0, %xmm0
-; X32-NEXT:    movlpd %xmm0, (%esp)
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    movlps %xmm0, (%esp)
 ; X32-NEXT:    movq (%esp), %mm0
 ; X32-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
 ; X32-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
 ; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT:    movlpd %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movq {{[0-9]+}}(%esp), %mm1
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    maskmovq %mm1, %mm0
@@ -54,8 +54,8 @@ define void @test1() {
 ;
 ; X64-LABEL: test1:
 ; X64:       ## BB#0: ## %entry
-; X64-NEXT:    pxor %xmm0, %xmm0
-; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movlps %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 6b7f489..b0240dd 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -3,12 +3,14 @@
 
 ; CHECK: movl
 ; CHECK: paddw
-; CHECK: movlpd
+; CHECK: movq
+
+; FIXME - if this test cares about scheduling, why isn't it being checked?
 
 ; Scheduler causes produce a different instruction order
 ; ATOM: movl
 ; ATOM: paddw
-; ATOM: movlpd
+; ATOM: movq
 
 ; bitcast a v4i16 to v2i32
 
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 060dfb1..8ed2785 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -52,7 +52,7 @@ forbody:		; preds = %forcond
 ; CHECK-NEXT: psraw $8
 ; CHECK-NEXT: psraw $2
 ; CHECK-NEXT: pshufb
-; CHECK-NEXT: movlpd
+; CHECK-NEXT: movq
 ;
 ; FIXME: We shouldn't require both a movd and an insert.
 ; CHECK-WIDE: %forbody
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index ccf0bd1..4e9d2df 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: movl
-; CHECK: movlpd
+; CHECK: movq
 
 ; bitcast a i64 to v2i32
 define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 2aa870f..3028052 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -84,7 +84,7 @@ define void @shuf5(<8 x i8>* %p) nounwind {
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33]
 ; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; CHECK-NEXT:    movlpd %xmm0, (%eax)
+; CHECK-NEXT:    movq %xmm0, (%eax)
 ; CHECK-NEXT:    retl
   %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   store <8 x i8> %v, <8 x i8>* %p, align 8
diff --git a/test/CodeGen/X86/x86-64-asm.ll b/test/CodeGen/X86/x86-64-asm.ll
index 2640e59..f103ab7 100644
--- a/test/CodeGen/X86/x86-64-asm.ll
+++ b/test/CodeGen/X86/x86-64-asm.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @frame_dummy() {
 entry:
-        %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]
+        %tmp1 = tail call void (i8*)* (void (i8*)*) asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]
         ret void
 }
 
diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll
index 2879fb4..2c954db 100644
--- a/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/test/CodeGen/X86/x86-64-tls-1.ll
@@ -3,7 +3,7 @@
 define i64 @z() nounwind {
 ; CHECK:      movq    $tm_nest_level@TPOFF, %r[[R0:[abcd]]]x
 ; CHECK-NEXT: addl    %fs:0, %e[[R0]]x
-; CHECK-NEXT: andq    $100, %r[[R0]]x
+; CHECK-NEXT: andl    $100, %e[[R0]]x
 
   ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
 }
diff --git a/test/CodeGen/X86/x86-64-varargs.ll b/test/CodeGen/X86/x86-64-varargs.ll
index f40e02f..ed07bde 100644
--- a/test/CodeGen/X86/x86-64-varargs.ll
+++ b/test/CodeGen/X86/x86-64-varargs.ll
@@ -6,6 +6,6 @@ declare i32 @printf(i8*, ...) nounwind
 
 define i32 @main() nounwind  {
 entry:
-	%tmp10.i = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([26 x i8], [26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind 		; <i32> [#uses=0]
+	%tmp10.i = tail call i32 (i8*, ...) @printf( i8* getelementptr ([26 x i8], [26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/test/CodeGen/X86/xmulo.ll b/test/CodeGen/X86/xmulo.ll
index ebc1907..825efa6 100644
--- a/test/CodeGen/X86/xmulo.ll
+++ b/test/CodeGen/X86/xmulo.ll
@@ -17,7 +17,7 @@ define i32 @t1() nounwind {
     %2 = extractvalue {i64, i1} %1, 0
     %3 = extractvalue {i64, i1} %1, 1
     %4 = zext i1 %3 to i32
-    %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
+    %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
     ret i32 0
 }
 
@@ -31,7 +31,7 @@ define i32 @t2() nounwind {
     %2 = extractvalue {i64, i1} %1, 0
     %3 = extractvalue {i64, i1} %1, 1
     %4 = zext i1 %3 to i32
-    %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
+    %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
     ret i32 0
 }
 
@@ -45,6 +45,6 @@ define i32 @t3() nounwind {
     %2 = extractvalue {i64, i1} %1, 0
     %3 = extractvalue {i64, i1} %1, 1
     %4 = zext i1 %3 to i32
-    %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
+    %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
     ret i32 0
 }
diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll
index dd1fcca..397e5bc 100644
--- a/test/CodeGen/X86/xor-icmp.ll
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -24,11 +24,11 @@ entry:
   br i1 %4, label %bb1, label %bb
 
 bb:                                               ; preds = %entry
-  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  %5 = tail call i32 (...) @foo() nounwind       ; <i32> [#uses=1]
   ret i32 %5
 
 bb1:                                              ; preds = %entry
-  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  %6 = tail call i32 (...) @bar() nounwind       ; <i32> [#uses=1]
   ret i32 %6
 }
 
@@ -59,7 +59,7 @@ entry:
   br i1 %2, label %bb, label %return
 
 bb:                                               ; preds = %entry
-  %3 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=0]
+  %3 = tail call i32 (...) @foo() nounwind       ; <i32> [#uses=0]
   ret i32 undef
 
 return:                                           ; preds = %entry
diff --git a/test/CodeGen/XCore/llvm-intrinsics.ll b/test/CodeGen/XCore/llvm-intrinsics.ll
index 539bf19..b7868d3 100644
--- a/test/CodeGen/XCore/llvm-intrinsics.ll
+++ b/test/CodeGen/XCore/llvm-intrinsics.ll
@@ -122,7 +122,7 @@ entry:
 ; CHECK-NEXT: ldw r0, sp[2]
 ; CHECK-NEXT: set sp, r2
 ; CHECK-NEXT: bau r3
-  call void (...)* @foo()
+  call void (...) @foo()
   call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
   unreachable
 }
@@ -144,7 +144,7 @@ entry:
 ; CHECK-NEXT: ldw r0, sp[2]
 ; CHECK-NEXT: set sp, r2
 ; CHECK-NEXT: bau r3
-  call void (...)* @foo()
+  call void (...) @foo()
   %0 = load i32, i32* @offset
   call void @llvm.eh.return.i32(i32 %0, i8* @handler)
   unreachable
@@ -244,7 +244,7 @@ define void @Unwind0() {
 ; CHECK: ldw r4, sp[9]
 ; CHECK: retsp 10
 define void @Unwind1() {
-  call void (...)* @foo()
+  call void (...) @foo()
   call void @llvm.eh.unwind.init()
   ret void
 }
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
index 846f38e..f816e72 100644
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -4,12 +4,12 @@
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!6}
 
-!0 = !MDSubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !4, scope: !1, type: !2, variables: !1)
+!0 = !MDSubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !4, scope: !1, type: !2)
 !1 = !MDFile(filename: "/foo", directory: "bar.cpp")
 !2 = !MDSubroutineType(types: !3)
 !3 = !{null}
 !4 = !MDFile(filename: "/foo", directory: "bar.cpp")
-!5 = !MDCompileUnit(language: DW_LANG_C99, isOptimized: true, emissionKind: 0, file: !4, enums: !3, retainedTypes: !3)
+!5 = !MDCompileUnit(language: DW_LANG_C99, isOptimized: true, emissionKind: 0, file: !4, enums: !{}, retainedTypes: !{})
 
 define <{i32, i32}> @f1() {
 ; CHECK: !dbgx ![[NUMBER:[0-9]+]]
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index 266b91d..eb0777d 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -11,7 +11,7 @@ entry:
 !llvm.module.flags = !{!18}
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12)
-!1 = !{i32 0}
+!1 = !{}
 !3 = !{!5}
 !5 = !MDSubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !17, scope: !6, type: !7, function: i32 ()* @foo)
 !6 = !MDFile(filename: "fb.c", directory: "/private/tmp")
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
index 2fc710a..f6255ec 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -2,7 +2,7 @@
 
 define void @bar(i32 %i) nounwind uwtable ssp {
 entry:
-  tail call void (...)* @foo() nounwind, !dbg !14
+  tail call void (...) @foo() nounwind, !dbg !14
   ret void, !dbg !16
 }
 
diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/2010-01-05-DbgScope.ll
index 39260bf..19c9335 100644
--- a/test/DebugInfo/2010-01-05-DbgScope.ll
+++ b/test/DebugInfo/2010-01-05-DbgScope.ll
@@ -20,6 +20,6 @@ entry:
 !6 = !MDBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !10 = !MDLocation(line: 588, column: 1, scope: !2)
 !11 = !MDFile(filename: "hashtab.c", directory: "/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty")
-!12 = !{i32 0}
+!12 = !{}
 !13 = !{!2}
 !14 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-03-12-llc-crash.ll b/test/DebugInfo/2010-03-12-llc-crash.ll
index 642db2c..57f96e3 100644
--- a/test/DebugInfo/2010-03-12-llc-crash.ll
+++ b/test/DebugInfo/2010-03-12-llc-crash.ll
@@ -5,7 +5,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define void @foo() {
 entry:
-  call void @llvm.dbg.declare(metadata i32* undef, metadata !0, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata i32* undef, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   ret void
 }
 
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
index a8ad499..dff5489 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -4,7 +4,7 @@
 
 define void @Foo(i32 %a, i32 %b) {
 entry:
-  call void @llvm.dbg.declare(metadata i32* null, metadata !1, metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata i32* null, metadata !1, metadata !MDExpression()), !dbg !MDLocation(scope: !6)
   ret void
 }
 !llvm.dbg.cu = !{!2}
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
index 696d655..f925297 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -66,5 +66,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !24 = !{!3, !12}
 !25 = !MDFile(filename: "one.cc", directory: "/tmp/")
 !26 = !MDFile(filename: "one.h", directory: "/tmp/")
-!27 = !{i32 0}
+!27 = !{}
 !28 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index 3ce4118..9683c1b 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -108,5 +108,5 @@ entry:
 !35 = !MDLocation(line: 7, column: 19, scope: !36)
 !36 = distinct !MDLexicalBlock(line: 7, column: 17, file: !38, scope: !23)
 !38 = !MDFile(filename: "one.cc", directory: "/tmp")
-!39 = !{i32 0}
+!39 = !{}
 !40 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
index 60e46b5..0bded546 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -33,5 +33,5 @@ return:                                           ; preds = %entry
 !7 = !MDLocation(line: 2, scope: !8)
 !8 = distinct !MDLexicalBlock(line: 2, column: 0, file: !10, scope: !1)
 !10 = !MDFile(filename: "a.c", directory: "/tmp")
-!11 = !{i32 0}
+!11 = !{}
 !12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
index 3e01ded..e87b51e 100644
--- a/test/DebugInfo/2010-05-03-DisableFramePtr.ll
+++ b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
@@ -35,6 +35,6 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !14 = !MDLocation(line: 8, scope: !15)
 !15 = distinct !MDLexicalBlock(line: 7, column: 0, file: !16, scope: !1)
 !16 = !MDFile(filename: "t.c", directory: "/Users/echeng/LLVM/radars/r7937664/")
-!17 = !{i32 0}
+!17 = !{}
 !18 = !{!1}
 !19 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index a5b7819..914e781 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -25,10 +25,10 @@ entry:
   %a12 = load i64, i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
   call void @llvm.dbg.declare(metadata i64* %data_addr.i17, metadata !8, metadata !MDExpression()) nounwind, !dbg !14
   store i64 %a12, i64* %data_addr.i17, align 8
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15, metadata !MDExpression()) nounwind
-  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !19, metadata !MDExpression()) nounwind
-  call void @llvm.dbg.declare(metadata !6, metadata !23, metadata !MDExpression()) nounwind
-  call void @llvm.dbg.value(metadata i64* %data_addr.i17, i64 0, metadata !34, metadata !MDExpression()) nounwind
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15, metadata !MDExpression()) nounwind, !dbg !MDLocation(scope: !16)
+  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !19, metadata !MDExpression()) nounwind, !dbg !MDLocation(scope: !16)
+  call void @llvm.dbg.declare(metadata !6, metadata !23, metadata !MDExpression()) nounwind, !dbg !MDLocation(scope: !24)
+  call void @llvm.dbg.value(metadata i64* %data_addr.i17, i64 0, metadata !34, metadata !MDExpression()) nounwind, !dbg !MDLocation(scope: !24)
   %a13 = load volatile i64, i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
   %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
   %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
@@ -68,7 +68,7 @@ declare void @uuid_LtoB(i8*, i8*)
 !15 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "base", line: 92, arg: 0, scope: !16, file: !10, type: !17)
 !16 = !MDSubprogram(name: "OSReadSwapInt64", linkageName: "OSReadSwapInt64", line: 95, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !38, scope: null, type: !5)
 !17 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !39, scope: !3, baseType: null)
-!18 = !{i32 0}
+!18 = !{}
 !19 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "byteOffset", line: 94, arg: 0, scope: !16, file: !10, type: !20)
 !20 = !MDDerivedType(tag: DW_TAG_typedef, name: "uintptr_t", line: 114, file: !37, scope: !3, baseType: !22)
 !21 = !MDFile(filename: "types.h", directory: "/usr/include/ppc")
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
index 6ccab85..3430e86 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -39,6 +39,6 @@ return:
 !14 = !{!15}
 !15 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !18 = !MDFile(filename: "a.c", directory: "/tmp/")
-!19 = !{i32 0}
+!19 = !{}
 !20 = !MDFile(filename: "b.c", directory: "/tmp/")
 !21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 2814d36..ae05e47 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -15,8 +15,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 define i32 @bar() nounwind ssp {
 entry:
   %0 = load i32, i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
-  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !9, metadata !MDExpression()), !dbg !19
-  tail call void @llvm.dbg.declare(metadata !29, metadata !10, metadata !MDExpression()), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !59, metadata !MDExpression()), !dbg !19
+  tail call void @llvm.dbg.declare(metadata !29, metadata !60, metadata !MDExpression()), !dbg !21
   %1 = mul nsw i32 %0, %0, !dbg !22               ; <i32> [#uses=2]
   store i32 %1, i32* @i, align 4, !dbg !17
   ret i32 %1, !dbg !23
@@ -36,6 +36,10 @@ entry:
 !8 = !{!5}
 !9 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
 !10 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
+!59 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
+!60 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
 !11 = distinct !MDLexicalBlock(line: 9, column: 0, file: !1, scope: !0)
 !12 = !MDCompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
 !13 = !{!14, !15}
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll
index 3e28929..67af58d 100644
--- a/test/DebugInfo/2010-07-19-Crash.ll
+++ b/test/DebugInfo/2010-07-19-Crash.ll
@@ -26,5 +26,5 @@ entry:
 !11 = !MDSubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: true, isDefinition: false, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
 !12 = !MDFile(filename: "one.c", directory: "/private/tmp")
 !13 = !{!0}
-!14 = !{i32 0}
+!14 = !{}
 !15 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
index c8a228d..78d6ab9 100644
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ b/test/DebugInfo/2010-10-01-crash.ll
@@ -22,5 +22,5 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 !23 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "rect", line: 53, arg: 0, scope: !0, file: !1, type: !5)
 !24 = !MDLocation(line: 53, column: 33, scope: !0)
 !25 = !MDFile(filename: "GSFusedSilica.m", directory: "/Volumes/Data/Users/sabre/Desktop")
-!26 = !{i32 0}
+!26 = !{}
 !27 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/AArch64/cfi-eof-prologue.ll b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
index a20e2c5..ab4ef4e 100644
--- a/test/DebugInfo/AArch64/cfi-eof-prologue.ll
+++ b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
@@ -106,7 +106,7 @@ attributes #3 = { nounwind }
 !42 = !{!"vtable pointer", !43, i64 0}
 !43 = !{!"Simple C/C++ TBAA"}
 !44 = !MDLocation(line: 0, scope: !32)
-!45 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31, inlinedAt: !46)
+!45 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
 !46 = !MDLocation(line: 9, scope: !32)
 !47 = !MDLocation(line: 0, scope: !28, inlinedAt: !46)
 !48 = !MDLocation(line: 9, scope: !28, inlinedAt: !46)
diff --git a/test/DebugInfo/AArch64/frameindices.ll b/test/DebugInfo/AArch64/frameindices.ll
index 71ca4b8..51424a0 100644
--- a/test/DebugInfo/AArch64/frameindices.ll
+++ b/test/DebugInfo/AArch64/frameindices.ll
@@ -234,7 +234,7 @@ attributes #5 = { builtin }
 !71 = !MDLocation(line: 15, column: 3, scope: !25, inlinedAt: !66)
 !72 = !MDLocation(line: 16, column: 1, scope: !25, inlinedAt: !66)
 !73 = !MDLocation(line: 17, column: 27, scope: !31)
-!74 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A", inlinedAt: !75)
+!74 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
 !75 = distinct !MDLocation(line: 22, column: 3, scope: !34)
 !76 = !MDExpression(DW_OP_bit_piece, 8, 120)
 !77 = !MDLocation(line: 17, column: 12, scope: !31, inlinedAt: !75)
diff --git a/test/DebugInfo/ARM/cfi-eof-prologue.ll b/test/DebugInfo/ARM/cfi-eof-prologue.ll
index d32e8f7..fd91f92 100644
--- a/test/DebugInfo/ARM/cfi-eof-prologue.ll
+++ b/test/DebugInfo/ARM/cfi-eof-prologue.ll
@@ -109,7 +109,7 @@ attributes #3 = { nounwind }
 !44 = !{!"vtable pointer", !45, i64 0}
 !45 = !{!"Simple C/C++ TBAA"}
 !46 = !MDLocation(line: 0, scope: !32)
-!47 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31, inlinedAt: !48)
+!47 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
 !48 = !MDLocation(line: 9, scope: !32)
 !49 = !MDLocation(line: 0, scope: !28, inlinedAt: !48)
 !50 = !MDLocation(line: 9, scope: !28, inlinedAt: !48)
diff --git a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
index f3e00b5..f2f4cfd 100644
--- a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
+++ b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -69,7 +69,7 @@ attributes #1 = { nounwind readnone }
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "<unknown>", directory: "/Volumes/Data/radar/15464571")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4}
 !4 = !MDSubprogram(name: "run", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !5, scope: !6, type: !7, function: void (float)* @run, variables: !10)
 !5 = !MDFile(filename: "test.c", directory: "/Volumes/Data/radar/15464571")
diff --git a/test/DebugInfo/ARM/s-super-register.ll b/test/DebugInfo/ARM/s-super-register.ll
index 31b0b2e..b47566c 100644
--- a/test/DebugInfo/ARM/s-super-register.ll
+++ b/test/DebugInfo/ARM/s-super-register.ll
@@ -59,5 +59,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !16 = !{!1}
 !17 = !{!5, !8}
 !18 = !MDFile(filename: "k.cc", directory: "/private/tmp")
-!19 = !{i32 0}
+!19 = !{}
 !20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Mips/InlinedFnLocalVar.ll b/test/DebugInfo/Mips/InlinedFnLocalVar.ll
index 6cef6b4..48344a9 100644
--- a/test/DebugInfo/Mips/InlinedFnLocalVar.ll
+++ b/test/DebugInfo/Mips/InlinedFnLocalVar.ll
@@ -15,8 +15,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 define i32 @bar() nounwind ssp {
 entry:
   %0 = load i32, i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
-  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !9, metadata !MDExpression()), !dbg !19
-  tail call void @llvm.dbg.declare(metadata !29, metadata !10, metadata !MDExpression()), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !59, metadata !MDExpression()), !dbg !19
+  tail call void @llvm.dbg.declare(metadata !29, metadata !60, metadata !MDExpression()), !dbg !21
   %1 = mul nsw i32 %0, %0, !dbg !22               ; <i32> [#uses=2]
   store i32 %1, i32* @i, align 4, !dbg !17
   ret i32 %1, !dbg !23
@@ -36,6 +36,10 @@ entry:
 !8 = !{!5}
 !9 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
 !10 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
+!59 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
+!60 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
 !11 = distinct !MDLexicalBlock(line: 9, column: 0, file: !1, scope: !0)
 !12 = !MDCompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
 !13 = !{!14, !15}
diff --git a/test/DebugInfo/Mips/fn-call-line.ll b/test/DebugInfo/Mips/fn-call-line.ll
index 2252c20..ad90fa8 100644
--- a/test/DebugInfo/Mips/fn-call-line.ll
+++ b/test/DebugInfo/Mips/fn-call-line.ll
@@ -54,8 +54,8 @@
 ; Function Attrs: nounwind uwtable
 define void @f2() #0 {
 entry:
-  call void (...)* @f1(), !dbg !11
-  call void (...)* @f1(), !dbg !12
+  call void (...) @f1(), !dbg !11
+  call void (...) @f1(), !dbg !12
   ret void, !dbg !13
 }
 
diff --git a/test/DebugInfo/PR20038.ll b/test/DebugInfo/PR20038.ll
index b7ac893..6125565 100644
--- a/test/DebugInfo/PR20038.ll
+++ b/test/DebugInfo/PR20038.ll
@@ -74,10 +74,10 @@ land.end:                                         ; preds = %land.rhs, %entry
 
 cleanup.action:                                   ; preds = %land.end
   store %struct.C* %agg.tmp.ensured, %struct.C** %this.addr.i, align 8, !dbg !22
-  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !29, metadata !MDExpression()), !dbg !31
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !129, metadata !MDExpression()), !dbg !31
   %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !22
   store %struct.C* %this1.i, %struct.C** %this.addr.i.i, align 8, !dbg !21
-  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i.i, metadata !32, metadata !MDExpression()), !dbg !33
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i.i, metadata !132, metadata !MDExpression()), !dbg !33
   %this1.i.i = load %struct.C*, %struct.C** %this.addr.i.i, !dbg !21
   br label %cleanup.done, !dbg !22
 
@@ -94,7 +94,7 @@ entry:
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !29, metadata !MDExpression()), !dbg !38
   %this1 = load %struct.C*, %struct.C** %this.addr
   store %struct.C* %this1, %struct.C** %this.addr.i, align 8, !dbg !37
-  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !32, metadata !MDExpression()), !dbg !39
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !232, metadata !MDExpression()), !dbg !39
   %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !37
   ret void, !dbg !37
 }
@@ -154,6 +154,11 @@ attributes #2 = { nounwind readnone }
 !31 = !MDLocation(line: 0, scope: !17, inlinedAt: !22)
 !32 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
 !33 = !MDLocation(line: 0, scope: !16, inlinedAt: !21)
+
+!129 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
+!132 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+!232 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+
 !34 = !MDLocation(line: 5, scope: !35)
 !35 = distinct !MDLexicalBlock(line: 5, column: 0, file: !5, scope: !36)
 !36 = distinct !MDLexicalBlock(line: 5, column: 0, file: !5, scope: !12)
diff --git a/test/DebugInfo/Sparc/gnu-window-save.ll b/test/DebugInfo/Sparc/gnu-window-save.ll
index 73c5b87..f18c6ce 100644
--- a/test/DebugInfo/Sparc/gnu-window-save.ll
+++ b/test/DebugInfo/Sparc/gnu-window-save.ll
@@ -42,7 +42,7 @@ define signext i32 @main() #0 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %call = call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i32 0, i32 0)), !dbg !12
+  %call = call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i32 0, i32 0)), !dbg !12
   ret i32 0, !dbg !13
 }
 
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index 686380a..e7987fc 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -43,7 +43,7 @@ entry:
   %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26
   store i32 %call, i32* %val, align 4, !dbg !26
   %0 = load i32, i32* %val, align 4, !dbg !27
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
   ret i32 0, !dbg !28
 }
 
@@ -69,7 +69,7 @@ declare i32 @printf(i8*, ...)
 !16 = !{!10}
 !17 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "main_arr", line: 19, scope: !18, file: !6, type: !19)
 !18 = distinct !MDLexicalBlock(line: 18, column: 16, file: !29, scope: !14)
-!19 = !MDCompositeType(tag: DW_TAG_array_type, size: 3200, align: 32, baseType: !10, elements: !20)
+!19 = !MDCompositeType(tag: DW_TAG_array_type, size: 3200, align: 32, baseType: !10, elements: !{!20})
 !20 = !MDSubrange(count: 99)
 !22 = !MDLocation(line: 19, column: 7, scope: !18)
 !23 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "val", line: 20, scope: !18, file: !6, type: !10)
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index fb611fd..c990bfc 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -50,5 +50,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !16 = distinct !MDLexicalBlock(line: 7, column: 0, file: !18, scope: !1)
 !17 = !{!1}
 !18 = !MDFile(filename: "a.c", directory: "/tmp/")
-!19 = !{i32 0}
+!19 = !{}
 !20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/DW_AT_linkage_name.ll b/test/DebugInfo/X86/DW_AT_linkage_name.ll
index 30a7aeb..daad4ab 100644
--- a/test/DebugInfo/X86/DW_AT_linkage_name.ll
+++ b/test/DebugInfo/X86/DW_AT_linkage_name.ll
@@ -83,16 +83,14 @@ attributes #2 = { ssp uwtable }
 !3 = !{!4}
 !4 = !MDCompositeType(tag: DW_TAG_structure_type, name: "A", line: 1, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1A")
 !5 = !{!6, !12}
-!6 = !MDSubprogram(name: "A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS1A", type: !7, variables: !11)
+!6 = !MDSubprogram(name: "A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS1A", type: !7)
 !7 = !MDSubroutineType(types: !8)
 !8 = !{null, !9, !10}
 !9 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !10 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!11 = !{i32 786468}
-!12 = !MDSubprogram(name: "~A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !13, variables: !15)
+!12 = !MDSubprogram(name: "~A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !13)
 !13 = !MDSubroutineType(types: !14)
 !14 = !{null, !9}
-!15 = !{i32 786468}
 !16 = !{!17, !18, !19}
 !17 = !MDSubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, function: void (%struct.A*)* @_ZN1AD2Ev, declaration: !12, variables: !2)
 !18 = !MDSubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, function: void (%struct.A*)* @_ZN1AD1Ev, declaration: !12, variables: !2)
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 0e2dbdb..82ed0a9 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -104,7 +104,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !0 = !MDSubprogram(name: "f", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !23, scope: !1, type: !3, function: void ()* @f, variables: !22)
 !1 = !MDFile(filename: "simple.c", directory: "/home/rengol01/temp/tests/dwarf/relocation")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: true, emissionKind: 1, file: !23, enums: !4, retainedTypes: !4, subprograms: !21, imports:  null)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: true, emissionKind: 1, file: !23, enums: !{}, retainedTypes: !{}, subprograms: !21, imports:  null)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{null}
 !5 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 5, scope: !6, file: !1, type: !7)
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 099d489..b8fa8b8 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -66,9 +66,7 @@ entry:
 !14 = !MDCompositeType(tag: DW_TAG_class_type, name: "A", line: 1, size: 32, align: 32, file: !37, elements: !15)
 !15 = !{!16, !17}
 !16 = !MDDerivedType(tag: DW_TAG_member, name: "m_a", line: 4, size: 32, align: 32, file: !37, scope: !14, baseType: !9)
-!17 = !MDSubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !14, type: !11, variables: !18)
-!18 = !{!19}
-!19 = !{} ; previously: invalid DW_TAG_base_type
+!17 = !MDSubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !14, type: !11)
 !20 = !MDSubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, function: void (%class.A*)* @_ZN1AC2Ev, declaration: !17, variables: !1)
 !21 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 8, scope: !22, file: !6, type: !14)
 !22 = distinct !MDLexicalBlock(line: 7, column: 11, file: !6, scope: !5)
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index 78f9be5..a90687a 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -27,22 +27,18 @@
 !8 = !{!9, !11}
 !9 = !MDDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, flags: DIFlagPrivate, file: !28, scope: !7, baseType: !10)
 !10 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!11 = !MDSubprogram(name: "A", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !6, scope: !7, type: !12, variables: !15)
+!11 = !MDSubprogram(name: "A", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !6, scope: !7, type: !12)
 !12 = !MDSubroutineType(types: !13)
 !13 = !{null, !14}
 !14 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !7)
-!15 = !{!16}
-!16 = !{} ; previously: invalid DW_TAG_base_type
 !17 = !MDGlobalVariable(name: "b", line: 11, isLocal: false, isDefinition: true, scope: null, file: !6, type: !18, variable: %class.B* @b)
 !18 = !MDCompositeType(tag: DW_TAG_class_type, name: "B", line: 5, size: 32, align: 32, file: !28, elements: !19)
 !19 = !{!20, !21, !27}
 !20 = !MDDerivedType(tag: DW_TAG_member, name: "b", line: 7, size: 32, align: 32, flags: DIFlagPrivate, file: !28, scope: !18, baseType: !10)
-!21 = !MDSubprogram(name: "B", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !6, scope: !18, type: !22, variables: !25)
+!21 = !MDSubprogram(name: "B", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !6, scope: !18, type: !22)
 !22 = !MDSubroutineType(types: !23)
 !23 = !{null, !24}
 !24 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !18)
-!25 = !{!26}
-!26 = !{} ; previously: invalid DW_TAG_base_type
-!27 = !MDDerivedType(tag: DW_TAG_friend, file: !18, baseType: !7)
+!27 = !MDDerivedType(tag: DW_TAG_friend, file: !28, baseType: !7)
 !28 = !MDFile(filename: "foo.cpp", directory: "/Users/echristo/tmp")
 !29 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/InlinedFnLocalVar.ll b/test/DebugInfo/X86/InlinedFnLocalVar.ll
index c5be61f..3832e4f 100644
--- a/test/DebugInfo/X86/InlinedFnLocalVar.ll
+++ b/test/DebugInfo/X86/InlinedFnLocalVar.ll
@@ -15,8 +15,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 define i32 @bar() nounwind ssp {
 entry:
   %0 = load i32, i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
-  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !9, metadata !MDExpression()), !dbg !19
-  tail call void @llvm.dbg.declare(metadata !29, metadata !10, metadata !MDExpression()), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !109, metadata !MDExpression()), !dbg !19
+  tail call void @llvm.dbg.declare(metadata !29, metadata !110, metadata !MDExpression()), !dbg !21
   %1 = mul nsw i32 %0, %0, !dbg !22               ; <i32> [#uses=2]
   store i32 %1, i32* @i, align 4, !dbg !17
   ret i32 %1, !dbg !23
@@ -36,6 +36,10 @@ entry:
 !8 = !{!5}
 !9 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
 !10 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
+!109 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
+!110 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
 !11 = distinct !MDLexicalBlock(line: 9, column: 0, file: !1, scope: !0)
 !12 = !MDCompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
 !13 = !{!14, !15}
diff --git a/test/DebugInfo/X86/arange-and-stub.ll b/test/DebugInfo/X86/arange-and-stub.ll
new file mode 100644
index 0000000..1952432
--- /dev/null
+++ b/test/DebugInfo/X86/arange-and-stub.ll
@@ -0,0 +1,53 @@
+; RUN: llc -generate-arange-section -relocation-model=pic < %s | FileCheck %s
+
+; CHECK:   .section        .data.rel,"aw",@progbits
+; CHECK-NOT: .section
+; CHECK: .L_ZTId.DW.stub:
+
+; CHECK:  .section        .data.rel,"aw",@progbits
+; CHECK-NEXT: .Lsec_end0:
+
+target triple = "x86_64-linux-gnu"
+
+@_ZTId = external constant i8*
+@zed = global [1 x void ()*] [void ()* @bar]
+
+define void @foo() {
+  ret void
+}
+
+define void @bar() {
+  invoke void @foo()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %0
+  ret void
+
+lpad:                                             ; preds = %0
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (void ()* @foo to i8*)
+          filter [1 x i8*] [i8* bitcast (i8** @_ZTId to i8*)]
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+
+!0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234308) (llvm/trunk 234310)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
+!1 = !MDFile(filename: "/Users/espindola/llvm/<stdin>", directory: "/Users/espindola/llvm/build")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !MDSubprogram(name: "foo", linkageName: "foo", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @foo, variables: !2)
+!5 = !MDFile(filename: "/Users/espindola/llvm/test.cpp", directory: "/Users/espindola/llvm/build")
+!6 = !MDSubroutineType(types: !7)
+!7 = !{null, !8}
+!8 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !MDSubprogram(name: "bar_d", linkageName: "bar", scope: !5, file: !5, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @bar, variables: !2)
+!10 = !{!11}
+!11 = !MDGlobalVariable(name: "zed", scope: !0, file: !5, line: 6, type: !12, isLocal: false, isDefinition: true, variable: [1 x void ()*]* @zed)
+!12 = !MDCompositeType(tag: DW_TAG_array_type, baseType: !13, size: 64, align: 64, elements: !15)
+!13 = !MDDerivedType(tag: DW_TAG_typedef, name: "vifunc", file: !5, line: 5, baseType: !14)
+!14 = !MDDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64, align: 64)
+!15 = !{!16}
+!16 = !MDSubrange(count: 1)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
index 1b33f4e..4632711 100644
--- a/test/DebugInfo/X86/arguments.ll
+++ b/test/DebugInfo/X86/arguments.ll
@@ -61,13 +61,12 @@ attributes #1 = { nounwind readnone }
 !9 = !{!10, !12}
 !10 = !MDDerivedType(tag: DW_TAG_member, name: "i", line: 3, size: 32, align: 32, file: !1, scope: !8, baseType: !11)
 !11 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!12 = !MDSubprogram(name: "foo", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !8, type: !13, variables: !18)
+!12 = !MDSubprogram(name: "foo", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !8, type: !13)
 !13 = !MDSubroutineType(types: !14)
 !14 = !{null, !15, !16}
 !15 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
 !16 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !17)
 !17 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!18 = !{i32 786468}
 !19 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "f", line: 6, arg: 1, scope: !4, file: !5, type: !8)
 !20 = !MDLocation(line: 6, scope: !4)
 !21 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "g", line: 6, arg: 2, scope: !4, file: !5, type: !8)
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index f78915f..314879b 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -45,7 +45,7 @@ entry:
   %3 = bitcast %struct.__block_literal_generic* %block.literal to i8*, !dbg !71
   %4 = load i8*, i8** %2, !dbg !71
   %5 = bitcast i8* %4 to void (i8*, ...)*, !dbg !71
-  call void (i8*, ...)* %5(i8* %3), !dbg !71
+  call void (i8*, ...) %5(i8* %3), !dbg !71
   ret void, !dbg !73
 }
 
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index db4c9b8..9895324 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -98,7 +98,7 @@ declare void @_Z8moz_freePv(i8*)
 !23 = !MDSubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD1Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, function: void ()* @_ZN17nsAutoRefCntD1Ev, declaration: !15, variables: !24)
 !24 = !{!26}
 !26 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !23, file: !6, type: !10)
-!27 = !MDSubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD2Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, function: i32* null, declaration: !15, variables: !28)
+!27 = !MDSubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD2Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, function: i32 ()* null, declaration: !15, variables: !28)
 !28 = !{!30}
 !30 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !10)
 !31 = !MDSubprogram(name: "operator=", linkageName: "_ZN12nsAutoRefCntaSEi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !6, scope: null, type: !32, declaration: !36, variables: !43)
diff --git a/test/DebugInfo/X86/cu-ranges-odr.ll b/test/DebugInfo/X86/cu-ranges-odr.ll
index 70d99db..182b1fd 100644
--- a/test/DebugInfo/X86/cu-ranges-odr.ll
+++ b/test/DebugInfo/X86/cu-ranges-odr.ll
@@ -69,11 +69,10 @@ attributes #1 = { nounwind readnone }
 !5 = !{!6, !8}
 !6 = !MDDerivedType(tag: DW_TAG_member, name: "a", line: 5, size: 32, align: 32, flags: DIFlagPrivate, file: !1, scope: !"_ZTS1A", baseType: !7)
 !7 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !MDSubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !9, variables: !12)
+!8 = !MDSubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !9)
 !9 = !MDSubroutineType(types: !10)
 !10 = !{null, !11, !7}
 !11 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
-!12 = !{i32 786468}
 !13 = !{!14, !18, !19}
 !14 = !MDSubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !15, type: !16, function: void ()* @__cxx_global_var_init, variables: !2)
 !15 = !MDFile(filename: "baz.cpp", directory: "/usr/local/google/home/echristo/tmp")
diff --git a/test/DebugInfo/X86/dbg-byval-parameter.ll b/test/DebugInfo/X86/dbg-byval-parameter.ll
index ffab194..c30e977 100644
--- a/test/DebugInfo/X86/dbg-byval-parameter.ll
+++ b/test/DebugInfo/X86/dbg-byval-parameter.ll
@@ -48,5 +48,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !17 = distinct !MDLexicalBlock(line: 11, column: 0, file: !19, scope: !1)
 !18 = !{!1}
 !19 = !MDFile(filename: "b2.c", directory: "/tmp/")
-!20 = !{i32 0}
+!20 = !{}
 !21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-const-int.ll b/test/DebugInfo/X86/dbg-const-int.ll
index 8aa4e1c..abe4e94 100644
--- a/test/DebugInfo/X86/dbg-const-int.ll
+++ b/test/DebugInfo/X86/dbg-const-int.ll
@@ -35,5 +35,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !11 = !{!1}
 !12 = !{!6}
 !13 = !MDFile(filename: "a.c", directory: "/private/tmp")
-!14 = !{i32 0}
+!14 = !{}
 !15 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-const.ll b/test/DebugInfo/X86/dbg-const.ll
index a0f9efe..fd20c3c 100644
--- a/test/DebugInfo/X86/dbg-const.ll
+++ b/test/DebugInfo/X86/dbg-const.ll
@@ -47,5 +47,5 @@ declare i32 @bar() nounwind readnone
 !13 = !{!0}
 !14 = !{!6}
 !15 = !MDFile(filename: "mu.c", directory: "/private/tmp")
-!16 = !{i32 0}
+!16 = !{}
 !17 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
index f6670e9..a3a61e5 100644
--- a/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -74,7 +74,7 @@ entry:
 
 !0 = !MDSubprogram(name: "~A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !1, type: !11)
 !1 = !MDCompositeType(tag: DW_TAG_class_type, name: "A", line: 2, size: 128, align: 32, file: !51, scope: !2, elements: !4)
-!2 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130127)", isOptimized: false, emissionKind: 1, file: !51, enums: !24, retainedTypes: !24, subprograms: !50)
+!2 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130127)", isOptimized: false, emissionKind: 1, file: !51, enums: !{}, retainedTypes: !{}, subprograms: !50)
 !3 = !MDFile(filename: "a.cc", directory: "/private/tmp")
 !4 = !{!5, !7, !8, !9, !0, !10, !14}
 !5 = !MDDerivedType(tag: DW_TAG_member, name: "x", line: 2, size: 32, align: 32, file: !51, scope: !3, baseType: !6)
@@ -85,12 +85,12 @@ entry:
 !10 = !MDSubprogram(name: "A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, file: !51, scope: !1, type: !11)
 !11 = !MDSubroutineType(types: !12)
 !12 = !{null, !13}
-!13 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !2, baseType: !1)
+!13 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !3, baseType: !1)
 !14 = !MDSubprogram(name: "A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, file: !51, scope: !1, type: !15)
 !15 = !MDSubroutineType(types: !16)
 !16 = !{null, !13, !17}
 !17 = !MDDerivedType(tag: DW_TAG_reference_type, scope: !2, baseType: !18)
-!18 = !MDDerivedType(tag: DW_TAG_const_type, file: !2, baseType: !1)
+!18 = !MDDerivedType(tag: DW_TAG_const_type, file: !3, baseType: !1)
 !19 = !MDSubprogram(name: "foo", linkageName: "_Z3fooi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !20, function: void (%class.A*, i32)* @_Z3fooi)
 !20 = !MDSubroutineType(types: !21)
 !21 = !{!1}
@@ -110,7 +110,7 @@ entry:
 !35 = !MDLocation(line: 8, column: 3, scope: !34)
 !36 = !MDLocation(line: 9, column: 9, scope: !29)
 !37 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "my_a", line: 9, scope: !29, file: !3, type: !38)
-!38 = !MDDerivedType(tag: DW_TAG_reference_type, file: !2, baseType: !1)
+!38 = !MDDerivedType(tag: DW_TAG_reference_type, file: !3, baseType: !1)
 !39 = !MDLocation(line: 9, column: 5, scope: !29)
 !40 = !MDLocation(line: 10, column: 3, scope: !29)
 !41 = !MDLocation(line: 11, column: 3, scope: !29)
diff --git a/test/DebugInfo/X86/dbg-file-name.ll b/test/DebugInfo/X86/dbg-file-name.ll
index 42f015a..7ded7c4 100644
--- a/test/DebugInfo/X86/dbg-file-name.ll
+++ b/test/DebugInfo/X86/dbg-file-name.ll
@@ -20,5 +20,5 @@ define i32 @main() nounwind {
 !8 = !{!5}
 !9 = !{!6}
 !10 = !MDFile(filename: "simple.c", directory: "/Users/manav/one/two")
-!11 = !{i32 0}
+!11 = !{}
 !12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-i128-const.ll b/test/DebugInfo/X86/dbg-i128-const.ll
index d172f01..1b89188 100644
--- a/test/DebugInfo/X86/dbg-i128-const.ll
+++ b/test/DebugInfo/X86/dbg-i128-const.ll
@@ -31,5 +31,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !12 = !{!3}
 !13 = !MDFile(filename: "foo.c", directory: "/tmp")
 !14 = !MDFile(filename: "myint.h", directory: "/tmp")
-!15 = !{i32 0}
+!15 = !{}
 !16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-merge-loc-entry.ll b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
index fda8f28..b46fe2b 100644
--- a/test/DebugInfo/X86/dbg-merge-loc-entry.ll
+++ b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
@@ -71,5 +71,5 @@ declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 !28 = !{!0, !9}
 !29 = !MDFile(filename: "foobar.c", directory: "/tmp")
 !30 = !MDFile(filename: "foobar.h", directory: "/tmp")
-!31 = !{i32 0}
+!31 = !{}
 !32 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-prolog-end.ll b/test/DebugInfo/X86/dbg-prolog-end.ll
index 3467c3f..54c2434 100644
--- a/test/DebugInfo/X86/dbg-prolog-end.ll
+++ b/test/DebugInfo/X86/dbg-prolog-end.ll
@@ -55,5 +55,5 @@ entry:
 !16 = !MDLocation(line: 8, column: 2, scope: !17)
 !17 = distinct !MDLexicalBlock(line: 7, column: 12, file: !19, scope: !6)
 !19 = !MDFile(filename: "/tmp/a.c", directory: "/private/tmp")
-!20 = !{i32 0}
+!20 = !{}
 !21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-value-const-byref.ll b/test/DebugInfo/X86/dbg-value-const-byref.ll
index f8453a5..766b72e 100644
--- a/test/DebugInfo/X86/dbg-value-const-byref.ll
+++ b/test/DebugInfo/X86/dbg-value-const-byref.ll
@@ -53,7 +53,7 @@ entry:
   call void @llvm.dbg.value(metadata i32 3, i64 0, metadata !10, metadata !MDExpression()), !dbg !15
   %call = call i32 @f3(i32 3) #3, !dbg !16
   call void @llvm.dbg.value(metadata i32 7, i64 0, metadata !10, metadata !MDExpression()), !dbg !18
-  %call1 = call i32 (...)* @f1() #3, !dbg !19
+  %call1 = call i32 (...) @f1() #3, !dbg !19
   call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !10, metadata !MDExpression()), !dbg !19
   store i32 %call1, i32* %i, align 4, !dbg !19, !tbaa !20
   call void @llvm.dbg.value(metadata i32* %i, i64 0, metadata !10, metadata !MDExpression()), !dbg !24
diff --git a/test/DebugInfo/X86/dbg-value-dag-combine.ll b/test/DebugInfo/X86/dbg-value-dag-combine.ll
index 2f4e83d..7370e8d 100644
--- a/test/DebugInfo/X86/dbg-value-dag-combine.ll
+++ b/test/DebugInfo/X86/dbg-value-dag-combine.ll
@@ -36,7 +36,7 @@ entry:
 !9 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "gid", line: 3, scope: !10, file: !1, type: !6)
 !10 = distinct !MDLexicalBlock(line: 2, column: 1, file: !19, scope: !0)
 !11 = !MDLocation(line: 3, column: 41, scope: !10)
-!12 = !{i32 0}
+!12 = !{}
 !13 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "idx", line: 4, scope: !10, file: !1, type: !6)
 !14 = !MDLocation(line: 4, column: 20, scope: !10)
 !15 = !MDLocation(line: 5, column: 15, scope: !10)
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index 0ba5f62..df650b7 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -76,14 +76,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !0 = !MDSubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !1, type: !3, function: i32 (%struct.S1*, i32)* @foo, variables: !41)
 !1 = !MDFile(filename: "nm2.c", directory: "/private/tmp")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125693)", isOptimized: true, emissionKind: 1, file: !42, enums: !8, retainedTypes: !8, subprograms: !39, globals: !40, imports:  !44)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125693)", isOptimized: true, emissionKind: 1, file: !42, enums: !{}, retainedTypes: !{}, subprograms: !39, globals: !40, imports:  !44)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{!5}
 !5 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !6 = !MDSubprogram(name: "foobar", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: !1, type: !7, function: void ()* @foobar)
 !7 = !MDSubroutineType(types: !8)
 !8 = !{null}
-!9 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "sp", line: 7, arg: 1, scope: !0, file: !1, type: !10, inlinedAt: !32)
+!9 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "sp", line: 7, arg: 1, scope: !0, file: !1, type: !10)
 !10 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !11)
 !11 = !MDDerivedType(tag: DW_TAG_typedef, name: "S1", line: 4, file: !42, scope: !2, baseType: !12)
 !12 = !MDCompositeType(tag: DW_TAG_structure_type, name: "S1", line: 1, size: 128, align: 64, file: !42, scope: !2, elements: !13)
@@ -92,7 +92,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !15 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !16)
 !16 = !MDBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !17 = !MDDerivedType(tag: DW_TAG_member, name: "nums", line: 3, size: 32, align: 32, offset: 64, file: !42, scope: !1, baseType: !5)
-!18 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "nums", line: 7, arg: 2, scope: !0, file: !1, type: !5, inlinedAt: !32)
+!18 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "nums", line: 7, arg: 2, scope: !0, file: !1, type: !5)
 !19 = !MDGlobalVariable(name: "p", line: 14, isLocal: false, isDefinition: true, scope: !2, file: !1, type: !11, variable: %struct.S1* @p)
 !20 = !MDLocation(line: 7, column: 13, scope: !0)
 !21 = !MDLocation(line: 7, column: 21, scope: !0)
diff --git a/test/DebugInfo/X86/dbg-value-isel.ll b/test/DebugInfo/X86/dbg-value-isel.ll
index 5c000f7..823cacd 100644
--- a/test/DebugInfo/X86/dbg-value-isel.ll
+++ b/test/DebugInfo/X86/dbg-value-isel.ll
@@ -102,5 +102,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !18 = !MDLocation(line: 10, column: 1, scope: !0)
 !19 = !{!0}
 !20 = !MDFile(filename: "OCLlLwTXZ.cl", directory: "/tmp")
-!21 = !{i32 0}
+!21 = !{}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
index d852ee3..2a8a91c 100644
--- a/test/DebugInfo/X86/dbg-value-location.ll
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -75,5 +75,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !25 = !MDFile(filename: "f.i", directory: "/tmp")
 !26 = !MDFile(filename: "/tmp/f.c", directory: "/tmp")
 !27 = !MDFile(filename: "f.i", directory: "/tmp")
-!28 = !{i32 0}
+!28 = !{}
 !29 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-value-range.ll b/test/DebugInfo/X86/dbg-value-range.ll
index a49b503..df1ab7b 100644
--- a/test/DebugInfo/X86/dbg-value-range.ll
+++ b/test/DebugInfo/X86/dbg-value-range.ll
@@ -8,7 +8,7 @@ entry:
   %tmp1 = getelementptr inbounds %struct.a, %struct.a* %b, i64 0, i32 0, !dbg !14
   %tmp2 = load i32, i32* %tmp1, align 4, !dbg !14
   tail call void @llvm.dbg.value(metadata i32 %tmp2, i64 0, metadata !11, metadata !MDExpression()), !dbg !14
-  %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
+  %call = tail call i32 (...) @foo(i32 %tmp2) nounwind , !dbg !18
   %add = add nsw i32 %tmp2, 1, !dbg !19
   ret i32 %add, !dbg !19
 }
@@ -40,7 +40,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !20 = !{!0}
 !21 = !{!6, !11}
 !22 = !MDFile(filename: "bar.c", directory: "/private/tmp")
-!23 = !{i32 0}
+!23 = !{}
 
 ; Check that variable bar:b value range is appropriately truncated in debug info.
 ; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
diff --git a/test/DebugInfo/X86/dbg-value-terminator.ll b/test/DebugInfo/X86/dbg-value-terminator.ll
index 2cea3f9..24841ab 100644
--- a/test/DebugInfo/X86/dbg-value-terminator.ll
+++ b/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -129,5 +129,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !18 = !{!1}
 !19 = !{!6, !7, !10}
 !20 = !MDFile(filename: "a.c", directory: "/private/tmp")
-!21 = !{i32 0}
+!21 = !{}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
index 890e9cd..22c5ce5 100644
--- a/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -53,7 +53,7 @@ entry:
   %19 = inttoptr i64 %18 to i8*
   %20 = load i8, i8* %19
   %21 = icmp ne i8 %20, 0
-  call void @llvm.dbg.declare(metadata i32* %3, metadata !23, metadata !28)
+  call void @llvm.dbg.declare(metadata i32* %3, metadata !23, metadata !28), !dbg !MDLocation(scope: !4)
   br i1 %21, label %22, label %28
 
 ; <label>:22                                      ; preds = %entry
@@ -157,17 +157,15 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !7 = !{!8, !21}
 !8 = !MDCompositeType(tag: DW_TAG_structure_type, name: "A", line: 1, size: 8, align: 8, file: !1, elements: !9)
 !9 = !{!10, !15}
-!10 = !MDSubprogram(name: "A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !8, type: !11, variables: !14)
+!10 = !MDSubprogram(name: "A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !8, type: !11)
 !11 = !MDSubroutineType(types: !12)
 !12 = !{null, !13}
 !13 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
-!14 = !{i32 786468}
-!15 = !MDSubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !8, type: !16, variables: !20)
+!15 = !MDSubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !8, type: !16)
 !16 = !MDSubroutineType(types: !17)
 !17 = !{null, !13, !18}
 !18 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !19)
 !19 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!20 = !{i32 786468}
 !21 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !22 = !{i32 2, !"Dwarf Version", i32 3}
 !23 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "", line: 6, arg: 1, scope: !4, file: !5, type: !21)
diff --git a/test/DebugInfo/X86/debug-loc-asan.ll b/test/DebugInfo/X86/debug-loc-asan.ll
index b15668a..a30c13f 100644
--- a/test/DebugInfo/X86/debug-loc-asan.ll
+++ b/test/DebugInfo/X86/debug-loc-asan.ll
@@ -77,7 +77,7 @@ entry:
   %21 = inttoptr i64 %20 to i8*
   %22 = load i8, i8* %21
   %23 = icmp ne i8 %22, 0
-  call void @llvm.dbg.declare(metadata i32* %8, metadata !12, metadata !14)
+  call void @llvm.dbg.declare(metadata i32* %8, metadata !12, metadata !14), !dbg !MDLocation(scope: !4)
   br i1 %23, label %24, label %30
 
 ; <label>:24                                      ; preds = %5
diff --git a/test/DebugInfo/X86/debug-ranges-offset.ll b/test/DebugInfo/X86/debug-ranges-offset.ll
index c779cea..55872db 100644
--- a/test/DebugInfo/X86/debug-ranges-offset.ll
+++ b/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -234,7 +234,7 @@ attributes #4 = { builtin }
 !29 = !MDLocation(line: 7, scope: !4)
 !30 = !MDLocation(line: 4, scope: !4, inlinedAt: !31)
 !31 = !MDLocation(line: 10, scope: !13)
-!32 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 4, scope: !4, file: !5, type: !10, inlinedAt: !31)
+!32 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 4, scope: !4, file: !5, type: !10)
 !33 = !MDLocation(line: 5, scope: !21, inlinedAt: !31)
 !34 = !MDLocation(line: 6, scope: !21, inlinedAt: !31)
 !35 = !MDLocation(line: 7, scope: !4, inlinedAt: !31)
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
index ca42993..49aea75 100644
--- a/test/DebugInfo/X86/debug_frame.ll
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -15,7 +15,7 @@ entry:
 
 !0 = !MDSubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3, function: void ()* @f)
 !1 = !MDFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/llvm/build")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !4, retainedTypes: !4, subprograms: !5)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{null}
 !6 = !MDFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/llvm/build")
diff --git a/test/DebugInfo/X86/deleted-bit-piece.ll b/test/DebugInfo/X86/deleted-bit-piece.ll
new file mode 100644
index 0000000..b8ae9b1
--- /dev/null
+++ b/test/DebugInfo/X86/deleted-bit-piece.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s | FileCheck %s
+; This is mainly a crasher for the revert in r234717.  A debug info intrinsic
+; that gets deleted can't have its bit piece expression verified after it's
+; deleted.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: __Z3foov:
+; CHECK: retq
+
+define void @_Z3foov() {
+entry:
+  br i1 undef, label %exit, label %bb
+
+bb:                                               ; preds = %entry
+  call void @llvm.dbg.value(metadata i8* undef, i64 0, metadata !15, metadata !16), !dbg !17
+  br label %exit
+
+exit:                                             ; preds = %bb, %entry
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.module.flags = !{!0, !1}
+!llvm.dbg.cu = !{!2}
+
+!0 = !{i32 2, !"Dwarf Version", i32 2}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !MDCompileUnit(language: DW_LANG_C_plus_plus, file: !3, isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !4, retainedTypes: !5, subprograms: !11, globals: !4, imports: !4)
+!3 = !MDFile(filename: "foo.cpp", directory: "/path/to/dir")
+!4 = !{}
+!5 = !{!6}
+!6 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Class", size: 64, align: 64, elements: !7, identifier: "_ZT5Class")
+!7 = !{!8, !10}
+!8 = !MDDerivedType(tag: DW_TAG_member, name: "a", scope: !"_ZT5Class", baseType: !9, size: 32, align: 32)
+!9 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !MDDerivedType(tag: DW_TAG_member, name: "b", scope: !"_ZT5Class", baseType: !9, size: 32, align: 32)
+!11 = !{!12}
+!12 = !MDSubprogram(name: "foo", scope: null, file: !3, type: !13, isLocal: false, isDefinition: true, isOptimized: false, function: void ()* @_Z3foov)
+!13 = !MDSubroutineType(types: !14)
+!14 = !{null}
+!15 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "v", scope: !12, type: !"_ZT5Class")
+!16 = !MDExpression(DW_OP_bit_piece, 32, 32)
+!17 = !MDLocation(line: 2755, column: 9, scope: !12)
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
index 81986f6..5525a3c 100644
--- a/test/DebugInfo/X86/earlydup-crash.ll
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -13,7 +13,7 @@ entry:
 bb:                                               ; preds = %entry
   %tmp = icmp eq i32 undef, 0
   %tmp1 = add i32 0, 11
-  call void @llvm.dbg.value(metadata i32 %tmp1, i64 0, metadata !0, metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32 %tmp1, i64 0, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   br i1 undef, label %bb18, label %bb31.preheader
 
 bb31.preheader:                                   ; preds = %bb19, %bb
@@ -90,5 +90,5 @@ declare void @foobar(i32)
 !43 = !MDFile(filename: "stddef.h", directory: "/Users/espindola/llvm/build-llvm-gcc/./prev-gcc/include")
 !44 = !MDFile(filename: "darwin-c.c", directory: "/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config")
 !45 = !{!2}
-!46 = !{i32 0}
+!46 = !{}
 !47 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index c56109d..4344a5b 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -77,16 +77,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !14 = !MDDerivedType(tag: DW_TAG_member, name: "c2", line: 7, size: 32, align: 32, offset: 32, flags: DIFlagPrivate, file: !53, scope: !10, baseType: !13)
 !15 = !MDDerivedType(tag: DW_TAG_member, name: "c3", line: 8, size: 32, align: 32, offset: 64, flags: DIFlagPrivate, file: !53, scope: !10, baseType: !13)
 !16 = !MDDerivedType(tag: DW_TAG_member, name: "c4", line: 9, size: 32, align: 32, offset: 96, flags: DIFlagPrivate, file: !53, scope: !10, baseType: !13)
-!17 = !MDSubprogram(name: "D", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !6, scope: !10, type: !7, variables: !18)
-!18 = !{!19}
-!19 = !{} ; previously: invalid DW_TAG_base_type
-!20 = !MDSubprogram(name: "D", line: 4, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !6, scope: !10, type: !21, variables: !25)
+!17 = !MDSubprogram(name: "D", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !6, scope: !10, type: !7)
+!20 = !MDSubprogram(name: "D", line: 4, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !6, scope: !10, type: !21)
 !21 = !MDSubroutineType(types: !22)
 !22 = !{null, !9, !23}
 !23 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !24)
 !24 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !10)
-!25 = !{!26}
-!26 = !{} ; previously: invalid DW_TAG_base_type
 !27 = !{!29}
 !29 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 12, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !5, file: !6, type: !30)
 !30 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index 628234c..678738d 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -39,11 +39,9 @@
 !11 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{!13}
 !13 = !MDSubrange(count: -1)
-!14 = !MDSubprogram(name: "A", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !6, scope: !7, type: !15, variables: !18)
+!14 = !MDSubprogram(name: "A", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !6, scope: !7, type: !15)
 !15 = !MDSubroutineType(types: !16)
 !16 = !{null, !17}
 !17 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !7)
-!18 = !{!19}
-!19 = !{} ; previously: invalid DW_TAG_base_type
 !20 = !MDFile(filename: "t.cpp", directory: "/Volumes/Sandbox/llvm")
 !21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/formal_parameter.ll b/test/DebugInfo/X86/formal_parameter.ll
index ba89ac4..97f45ec 100644
--- a/test/DebugInfo/X86/formal_parameter.ll
+++ b/test/DebugInfo/X86/formal_parameter.ll
@@ -29,7 +29,7 @@ entry:
   %map.addr = alloca i32, align 4
   store i32 %map, i32* %map.addr, align 4, !tbaa !15
   call void @llvm.dbg.declare(metadata i32* %map.addr, metadata !10, metadata !MDExpression()), !dbg !14
-  %call = call i32 (i32*, ...)* bitcast (i32 (...)* @lookup to i32 (i32*, ...)*)(i32* %map.addr) #3, !dbg !19
+  %call = call i32 (i32*, ...) bitcast (i32 (...)* @lookup to i32 (i32*, ...)*)(i32* %map.addr) #3, !dbg !19
   ; Ensure that all dbg intrinsics have the same scope after
   ; LowerDbgDeclare is finished with them.
   ;
@@ -37,7 +37,7 @@ entry:
   ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]]
   ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]]
 %0 = load i32, i32* %map.addr, align 4, !dbg !20, !tbaa !15
-  %call1 = call i32 (i32, ...)* bitcast (i32 (...)* @verify to i32 (i32, ...)*)(i32 %0) #3, !dbg !20
+  %call1 = call i32 (i32, ...) bitcast (i32 (...)* @verify to i32 (i32, ...)*)(i32 %0) #3, !dbg !20
   ret void, !dbg !22
 }
 
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
index e26eb14..b5c100a 100644
--- a/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -250,11 +250,10 @@ attributes #1 = { nounwind readnone }
 !28 = !MDCompositeType(tag: DW_TAG_structure_type, name: "walrus", line: 24, size: 8, align: 8, file: !1, scope: !29, elements: !30)
 !29 = !MDNamespace(line: 23, file: !1, scope: null)
 !30 = !{!31}
-!31 = !MDSubprogram(name: "walrus", line: 25, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !28, type: !32, variables: !35)
+!31 = !MDSubprogram(name: "walrus", line: 25, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !28, type: !32)
 !32 = !MDSubroutineType(types: !33)
 !33 = !{null, !34}
 !34 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !28)
-!35 = !{i32 786468}
 !36 = !MDSubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 25, file: !1, scope: !23, type: !37, function: void ()* @_GLOBAL__I_a, variables: !2)
 !37 = !MDSubroutineType(types: !2)
 !38 = !{!39, !40, !41, !42}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
index e7bb0a1..e57b3eb 100644
--- a/test/DebugInfo/X86/gnu-public-names.ll
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -31,10 +31,38 @@
 ;     global_variable.member_function();
 ;   }
 ;   int global_namespace_variable = 1;
+;   extern int global_namespace_variable_decl;
 ;   struct D {
 ;     int A;
 ;   } d;
 ; }
+;
+; using ns::global_namespace_variable_decl;
+;
+; namespace {
+; int i;
+; }
+;
+; int *f3() {
+;   static int z;
+;   return &z;
+; }
+;
+; namespace {
+; namespace inner {
+; int b;
+; }
+; }
+;
+; namespace outer {
+; namespace {
+; int c;
+; }
+; }
+;
+; int f7() {
+;   return i + *f3() + inner::b + outer::c;
+; }
 
 ; ASM: .section        .debug_gnu_pubnames
 ; ASM: .byte   32                      # Kind: VARIABLE, EXTERNAL
@@ -109,6 +137,25 @@
 ; CHECK:   NULL
 ; CHECK-NOT: {{DW_TAG|NULL}}
 
+; CHECK: [[ANON:.*]]: DW_TAG_namespace
+; CHECK-NOT:   DW_AT_name
+; CHECK: [[ANON_I:.*]]: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "i"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[ANON_INNER:.*]]:  DW_TAG_namespace
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "inner"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[ANON_INNER_B:.*]]: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_name {{.*}} "b"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:     NULL
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   NULL
+; CHECK-NOT: {{DW_TAG|NULL}}
+
 ; CHECK: [[OUTER:.*]]: DW_TAG_namespace
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "outer"
@@ -130,23 +177,7 @@
 ; CHECK:   NULL
 ; CHECK-NOT: {{DW_TAG|NULL}}
 
-; CHECK: [[ANON:.*]]: DW_TAG_namespace
-; CHECK-NOT:   DW_AT_name
-; CHECK: [[ANON_INNER:.*]]:  DW_TAG_namespace
-; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_name {{.*}} "inner"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[ANON_INNER_B:.*]]: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK:       DW_AT_name {{.*}} "b"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:     NULL
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[ANON_I:.*]]: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_name {{.*}} "i"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:   NULL
+; CHECK: DW_TAG_imported_declaration
 ; CHECK-NOT: {{DW_TAG|NULL}}
 
 ; CHECK: [[MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
@@ -166,29 +197,28 @@
 ; CHECK-LABEL: .debug_gnu_pubnames contents:
 ; CHECK-NEXT: length = {{.*}} version = 0x0002 unit_offset = 0x00000000 unit_size = {{.*}}
 ; CHECK-NEXT: Offset     Linkage  Kind     Name
-; CHECK-DAG:  [[GLOBAL_FUNC]] EXTERNAL FUNCTION "global_function"
-; CHECK-DAG:  [[NS]] EXTERNAL TYPE     "ns"
-; CHECK-DAG:  [[MEM_FUNC]] EXTERNAL FUNCTION "C::member_function"
-; CHECK-DAG:  [[GLOB_VAR]] EXTERNAL VARIABLE "global_variable"
-; CHECK-DAG:  [[GLOB_NS_VAR]] EXTERNAL VARIABLE "ns::global_namespace_variable"
-; CHECK-DAG:  [[GLOB_NS_FUNC]] EXTERNAL FUNCTION "ns::global_namespace_function"
-; CHECK-DAG:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
-; CHECK-DAG:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
-; CHECK-DAG:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
-; CHECK-DAG:  [[ANON]] EXTERNAL TYPE "(anonymous namespace)"
-; CHECK-DAG:  [[ANON_INNER]] EXTERNAL TYPE "(anonymous namespace)::inner"
-; CHECK-DAG:  [[OUTER]] EXTERNAL TYPE "outer"
-; CHECK-DAG:  [[OUTER_ANON]] EXTERNAL TYPE "outer::(anonymous namespace)"
-; CHECK-DAG:  [[ANON_I]] STATIC VARIABLE "(anonymous namespace)::i"
-; CHECK-DAG:  [[ANON_INNER_B]] STATIC VARIABLE "(anonymous namespace)::inner::b"
-; CHECK-DAG:  [[OUTER_ANON_C]] STATIC VARIABLE "outer::(anonymous namespace)::c"
-
+; CHECK-NEXT:  [[GLOBAL_FUNC]] EXTERNAL FUNCTION "global_function"
+; CHECK-NEXT:  [[NS]] EXTERNAL TYPE     "ns"
+; CHECK-NEXT:  [[OUTER_ANON_C]] STATIC VARIABLE "outer::(anonymous namespace)::c"
+; CHECK-NEXT:  [[ANON_I]] STATIC VARIABLE "(anonymous namespace)::i"
 ; GCC Doesn't put local statics in pubnames, but it seems not unreasonable and
 ; comes out naturally from LLVM's implementation, so I'm OK with it for now. If
 ; it's demonstrated that this is a major size concern or degrades debug info
 ; consumer behavior, feel free to change it.
+; CHECK-NEXT:  [[F3_Z]] STATIC VARIABLE "f3::z"
+; CHECK-NEXT:  [[ANON]] EXTERNAL TYPE "(anonymous namespace)"
+; CHECK-NEXT:  [[OUTER_ANON]] EXTERNAL TYPE "outer::(anonymous namespace)"
+; CHECK-NEXT:  [[ANON_INNER_B]] STATIC VARIABLE "(anonymous namespace)::inner::b"
+; CHECK-NEXT:  [[OUTER]] EXTERNAL TYPE "outer"
+; CHECK-NEXT:  [[MEM_FUNC]] EXTERNAL FUNCTION "C::member_function"
+; CHECK-NEXT:  [[GLOB_VAR]] EXTERNAL VARIABLE "global_variable"
+; CHECK-NEXT:  [[GLOB_NS_VAR]] EXTERNAL VARIABLE "ns::global_namespace_variable"
+; CHECK-NEXT:  [[ANON_INNER]] EXTERNAL TYPE "(anonymous namespace)::inner"
+; CHECK-NEXT:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
+; CHECK-NEXT:  [[GLOB_NS_FUNC]] EXTERNAL FUNCTION "ns::global_namespace_function"
+; CHECK-NEXT:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
+; CHECK-NEXT:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
 
-; CHECK-DAG:  [[F3_Z]] STATIC VARIABLE "f3::z"
 
 
 ; CHECK-LABEL: debug_gnu_pubtypes contents:
@@ -214,10 +244,10 @@ define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !50, metadata !MDExpression()), !dbg !52
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !51, metadata !53), !dbg !54
   %this1 = load %struct.C*, %struct.C** %this.addr
-  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !53
-  ret void, !dbg !54
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !55
+  ret void, !dbg !56
 }
 
 ; Function Attrs: nounwind readnone
@@ -226,108 +256,110 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; Function Attrs: nounwind uwtable
 define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
 entry:
-  %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !55
-  ret i32 %0, !dbg !55
+  %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !57
+  ret i32 %0, !dbg !57
 }
 
 ; Function Attrs: nounwind uwtable
 define i32 @_Z15global_functionv() #0 {
 entry:
-  ret i32 -1, !dbg !56
+  ret i32 -1, !dbg !58
 }
 
 ; Function Attrs: nounwind uwtable
 define void @_ZN2ns25global_namespace_functionEv() #0 {
 entry:
-  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !57
-  ret void, !dbg !58
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !59
+  ret void, !dbg !60
 }
 
 ; Function Attrs: nounwind uwtable
 define i32* @_Z2f3v() #0 {
 entry:
-  ret i32* @_ZZ2f3vE1z, !dbg !59
+  ret i32* @_ZZ2f3vE1z, !dbg !61
 }
 
 ; Function Attrs: nounwind uwtable
 define i32 @_Z2f7v() #0 {
 entry:
-  %0 = load i32, i32* @_ZN12_GLOBAL__N_11iE, align 4, !dbg !60
-  %call = call i32* @_Z2f3v(), !dbg !60
-  %1 = load i32, i32* %call, align 4, !dbg !60
-  %add = add nsw i32 %0, %1, !dbg !60
-  %2 = load i32, i32* @_ZN12_GLOBAL__N_15inner1bE, align 4, !dbg !60
-  %add1 = add nsw i32 %add, %2, !dbg !60
-  %3 = load i32, i32* @_ZN5outer12_GLOBAL__N_11cE, align 4, !dbg !60
-  %add2 = add nsw i32 %add1, %3, !dbg !60
-  ret i32 %add2, !dbg !60
+  %0 = load i32, i32* @_ZN12_GLOBAL__N_11iE, align 4, !dbg !62
+  %call = call i32* @_Z2f3v(), !dbg !62
+  %1 = load i32, i32* %call, align 4, !dbg !62
+  %add = add nsw i32 %0, %1, !dbg !62
+  %2 = load i32, i32* @_ZN12_GLOBAL__N_15inner1bE, align 4, !dbg !62
+  %add1 = add nsw i32 %add, %2, !dbg !62
+  %3 = load i32, i32* @_ZN5outer12_GLOBAL__N_11cE, align 4, !dbg !62
+  %add2 = add nsw i32 %add1, %3, !dbg !62
+  ret i32 %add2, !dbg !62
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!47, !48}
-!llvm.ident = !{!49}
+!llvm.module.flags = !{!48, !49}
+!llvm.ident = !{!50}
 
-!0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !32, imports: !45)
-!1 = !MDFile(filename: "pubnames.cpp", directory: "/tmp/dbginfo")
+!0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !31, imports: !44)
+!1 = !MDFile(filename: "gnu-public-names.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !15}
-!4 = !MDCompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1C")
+!4 = !MDCompositeType(tag: DW_TAG_structure_type, name: "C", file: !1, line: 1, size: 8, align: 8, elements: !5, identifier: "_ZTS1C")
 !5 = !{!6, !8, !12}
-!6 = !MDDerivedType(tag: DW_TAG_member, name: "static_member_variable", line: 4, flags: DIFlagStaticMember, file: !1, scope: !"_ZTS1C", baseType: !7)
-!7 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !MDSubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS1C", type: !9)
+!6 = !MDDerivedType(tag: DW_TAG_member, name: "static_member_variable", scope: !"_ZTS1C", file: !1, line: 4, baseType: !7, flags: DIFlagStaticMember)
+!7 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !MDSubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: !"_ZTS1C", file: !1, line: 2, type: !9, isLocal: false, isDefinition: false, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false)
 !9 = !MDSubroutineType(types: !10)
 !10 = !{null, !11}
-!11 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
-!12 = !MDSubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1C", type: !13)
+!11 = !MDDerivedType(tag: DW_TAG_pointer_type, baseType: !"_ZTS1C", size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!12 = !MDSubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: !"_ZTS1C", file: !1, line: 3, type: !13, isLocal: false, isDefinition: false, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false)
 !13 = !MDSubroutineType(types: !14)
 !14 = !{!7}
-!15 = !MDCompositeType(tag: DW_TAG_structure_type, name: "D", line: 28, size: 32, align: 32, file: !1, scope: !16, elements: !17, identifier: "_ZTSN2ns1DE")
-!16 = !MDNamespace(name: "ns", line: 23, file: !1, scope: null)
+!15 = !MDCompositeType(tag: DW_TAG_structure_type, name: "D", scope: !16, file: !1, line: 29, size: 32, align: 32, elements: !17, identifier: "_ZTSN2ns1DE")
+!16 = !MDNamespace(name: "ns", scope: null, file: !1, line: 23)
 !17 = !{!18}
-!18 = !MDDerivedType(tag: DW_TAG_member, name: "A", line: 29, size: 32, align: 32, file: !1, scope: !"_ZTSN2ns1DE", baseType: !7)
-!19 = !{!20, !21, !22, !24, !27, !31}
-!20 = !MDSubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !"_ZTS1C", type: !9, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !8, variables: !2)
-!21 = !MDSubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !"_ZTS1C", type: !13, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !12, variables: !2)
-!22 = !MDSubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !1, scope: !23, type: !13, function: i32 ()* @_Z15global_functionv, variables: !2)
-!23 = !MDFile(filename: "pubnames.cpp", directory: "/tmp/dbginfo")
-!24 = !MDSubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !16, type: !25, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !2)
-!25 = !MDSubroutineType(types: !26)
-!26 = !{null}
-!27 = !MDSubprogram(name: "f3", linkageName: "_Z2f3v", line: 37, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 37, file: !1, scope: !23, type: !28, function: i32* ()* @_Z2f3v, variables: !2)
-!28 = !MDSubroutineType(types: !29)
-!29 = !{!30}
-!30 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !7)
-!31 = !MDSubprogram(name: "f7", linkageName: "_Z2f7v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !1, scope: !23, type: !13, function: i32 ()* @_Z2f7v, variables: !2)
-!32 = !{!33, !34, !35, !36, !37, !38, !41, !44}
-!33 = !MDGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", line: 7, isLocal: false, isDefinition: true, scope: null, file: !23, type: !7, variable: i32* @_ZN1C22static_member_variableE, declaration: !6)
-!34 = !MDGlobalVariable(name: "global_variable", line: 17, isLocal: false, isDefinition: true, scope: null, file: !23, type: !"_ZTS1C", variable: %struct.C* @global_variable)
-!35 = !MDGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", line: 27, isLocal: false, isDefinition: true, scope: !16, file: !23, type: !7, variable: i32* @_ZN2ns25global_namespace_variableE)
-!36 = !MDGlobalVariable(name: "d", linkageName: "_ZN2ns1dE", line: 30, isLocal: false, isDefinition: true, scope: !16, file: !23, type: !"_ZTSN2ns1DE", variable: %"struct.ns::D"* @_ZN2ns1dE)
-!37 = !MDGlobalVariable(name: "z", line: 38, isLocal: true, isDefinition: true, scope: !27, file: !23, type: !7, variable: i32* @_ZZ2f3vE1z)
-!38 = !MDGlobalVariable(name: "c", linkageName: "_ZN5outer12_GLOBAL__N_11cE", line: 50, isLocal: true, isDefinition: true, scope: !39, file: !23, type: !7, variable: i32* @_ZN5outer12_GLOBAL__N_11cE)
-!39 = !MDNamespace(line: 49, file: !1, scope: !40)
-!40 = !MDNamespace(name: "outer", line: 48, file: !1, scope: null)
-!41 = !MDGlobalVariable(name: "b", linkageName: "_ZN12_GLOBAL__N_15inner1bE", line: 44, isLocal: true, isDefinition: true, scope: !42, file: !23, type: !7, variable: i32* @_ZN12_GLOBAL__N_15inner1bE)
-!42 = !MDNamespace(name: "inner", line: 43, file: !1, scope: !43)
-!43 = !MDNamespace(line: 33, file: !1, scope: null)
-!44 = !MDGlobalVariable(name: "i", linkageName: "_ZN12_GLOBAL__N_11iE", line: 34, isLocal: true, isDefinition: true, scope: !43, file: !23, type: !7, variable: i32* @_ZN12_GLOBAL__N_11iE)
-!45 = !{!46}
-!46 = !MDImportedEntity(tag: DW_TAG_imported_module, line: 40, scope: !40, entity: !39)
-!47 = !{i32 2, !"Dwarf Version", i32 4}
-!48 = !{i32 2, !"Debug Info Version", i32 3}
-!49 = !{!"clang version 3.5.0 "}
-!50 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !51)
-!51 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
-!52 = !MDLocation(line: 0, scope: !20)
-!53 = !MDLocation(line: 10, scope: !20)
-!54 = !MDLocation(line: 11, scope: !20)
-!55 = !MDLocation(line: 14, scope: !21)
-!56 = !MDLocation(line: 20, scope: !22)
-!57 = !MDLocation(line: 25, scope: !24)
-!58 = !MDLocation(line: 26, scope: !24)
-!59 = !MDLocation(line: 39, scope: !27)
-!60 = !MDLocation(line: 55, scope: !31)
+!18 = !MDDerivedType(tag: DW_TAG_member, name: "A", scope: !"_ZTSN2ns1DE", file: !1, line: 30, baseType: !7, size: 32, align: 32)
+!19 = !{!20, !21, !22, !23, !26, !30}
+!20 = !MDSubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: !"_ZTS1C", file: !1, line: 9, type: !9, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !8, variables: !2)
+!21 = !MDSubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: !"_ZTS1C", file: !1, line: 13, type: !13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !12, variables: !2)
+!22 = !MDSubprogram(name: "global_function", linkageName: "_Z15global_functionv", scope: !1, file: !1, line: 19, type: !13, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_Z15global_functionv, variables: !2)
+!23 = !MDSubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", scope: !16, file: !1, line: 24, type: !24, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !2)
+!24 = !MDSubroutineType(types: !25)
+!25 = !{null}
+!26 = !MDSubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 40, type: !27, isLocal: false, isDefinition: true, scopeLine: 40, flags: DIFlagPrototyped, isOptimized: false, function: i32* ()* @_Z2f3v, variables: !2)
+!27 = !MDSubroutineType(types: !28)
+!28 = !{!29}
+!29 = !MDDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 64)
+!30 = !MDSubprogram(name: "f7", linkageName: "_Z2f7v", scope: !1, file: !1, line: 57, type: !13, isLocal: false, isDefinition: true, scopeLine: 57, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_Z2f7v, variables: !2)
+!31 = !{!32, !33, !34, !35, !36, !37, !39, !41}
+!32 = !MDGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", scope: !0, file: !1, line: 7, type: !7, isLocal: false, isDefinition: true, variable: i32* @_ZN1C22static_member_variableE, declaration: !6)
+!33 = !MDGlobalVariable(name: "global_variable", scope: !0, file: !1, line: 17, type: !"_ZTS1C", isLocal: false, isDefinition: true, variable: %struct.C* @global_variable)
+!34 = !MDGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", scope: !16, file: !1, line: 27, type: !7, isLocal: false, isDefinition: true, variable: i32* @_ZN2ns25global_namespace_variableE)
+!35 = !MDGlobalVariable(name: "d", linkageName: "_ZN2ns1dE", scope: !16, file: !1, line: 31, type: !"_ZTSN2ns1DE", isLocal: false, isDefinition: true, variable: %"struct.ns::D"* @_ZN2ns1dE)
+!36 = !MDGlobalVariable(name: "z", scope: !26, file: !1, line: 41, type: !7, isLocal: true, isDefinition: true, variable: i32* @_ZZ2f3vE1z)
+!37 = !MDGlobalVariable(name: "i", linkageName: "_ZN12_GLOBAL__N_11iE", scope: !38, file: !1, line: 37, type: !7, isLocal: true, isDefinition: true, variable: i32* @_ZN12_GLOBAL__N_11iE)
+!38 = !MDNamespace(scope: null, file: !1, line: 36)
+!39 = !MDGlobalVariable(name: "b", linkageName: "_ZN12_GLOBAL__N_15inner1bE", scope: !40, file: !1, line: 47, type: !7, isLocal: true, isDefinition: true, variable: i32* @_ZN12_GLOBAL__N_15inner1bE)
+!40 = !MDNamespace(name: "inner", scope: !38, file: !1, line: 46)
+!41 = !MDGlobalVariable(name: "c", linkageName: "_ZN5outer12_GLOBAL__N_11cE", scope: !42, file: !1, line: 53, type: !7, isLocal: true, isDefinition: true, variable: i32* @_ZN5outer12_GLOBAL__N_11cE)
+!42 = !MDNamespace(scope: !43, file: !1, line: 52)
+!43 = !MDNamespace(name: "outer", scope: null, file: !1, line: 51)
+!44 = !{!45, !47}
+!45 = !MDImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !46, line: 34)
+!46 = !MDGlobalVariable(name: "global_namespace_variable_decl", linkageName: "_ZN2ns30global_namespace_variable_declE", scope: !16, file: !1, line: 28, type: !7, isLocal: false, isDefinition: false)
+!47 = !MDImportedEntity(tag: DW_TAG_imported_module, scope: !43, entity: !42, line: 43)
+!48 = !{i32 2, !"Dwarf Version", i32 4}
+!49 = !{i32 2, !"Debug Info Version", i32 3}
+!50 = !{!"clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)"}
+!51 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, scope: !20, type: !52, flags: DIFlagArtificial | DIFlagObjectPointer)
+!52 = !MDDerivedType(tag: DW_TAG_pointer_type, baseType: !"_ZTS1C", size: 64, align: 64)
+!53 = !MDExpression()
+!54 = !MDLocation(line: 0, scope: !20)
+!55 = !MDLocation(line: 10, scope: !20)
+!56 = !MDLocation(line: 11, scope: !20)
+!57 = !MDLocation(line: 14, scope: !21)
+!58 = !MDLocation(line: 20, scope: !22)
+!59 = !MDLocation(line: 25, scope: !23)
+!60 = !MDLocation(line: 26, scope: !23)
+!61 = !MDLocation(line: 42, scope: !26)
+!62 = !MDLocation(line: 58, scope: !30)
diff --git a/test/DebugInfo/X86/inline-member-function.ll b/test/DebugInfo/X86/inline-member-function.ll
index e5a4ff0..c1a367e 100644
--- a/test/DebugInfo/X86/inline-member-function.ll
+++ b/test/DebugInfo/X86/inline-member-function.ll
@@ -71,12 +71,11 @@ attributes #1 = { nounwind readnone }
 !3 = !{!4}
 !4 = !MDCompositeType(tag: DW_TAG_structure_type, name: "foo", line: 1, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS3foo")
 !5 = !{!6}
-!6 = !MDSubprogram(name: "func", linkageName: "_ZN3foo4funcEi", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS3foo", type: !7, variables: !11)
+!6 = !MDSubprogram(name: "func", linkageName: "_ZN3foo4funcEi", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS3foo", type: !7)
 !7 = !MDSubroutineType(types: !8)
 !8 = !{!9, !10, !9}
 !9 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
-!11 = !{i32 786468}
 !12 = !{!13, !17}
 !13 = !MDSubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !14, type: !15, function: i32 ()* @main, variables: !2)
 !14 = !MDFile(filename: "inline.cpp", directory: "/tmp/dbginfo")
diff --git a/test/DebugInfo/X86/inlined-formal-parameter.ll b/test/DebugInfo/X86/inlined-formal-parameter.ll
new file mode 100644
index 0000000..a2ae0df
--- /dev/null
+++ b/test/DebugInfo/X86/inlined-formal-parameter.ll
@@ -0,0 +1,75 @@
+; RUN: llc -filetype=obj -o %t.o %s
+; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+
+; Testcase generated using 'clang -g -O2 -S -emit-llvm' from the following:
+;; void sink(void);
+;; static __attribute__((always_inline)) void bar(int a) { sink(); }
+;; void foo(void) {
+;;   bar(0);
+;;   bar(0);
+;; }
+
+; Check that we have formal parameters for 'a' in both inlined subroutines.
+; CHECK:       DW_TAG_inlined_subroutine
+; CHECK-NEXT:    DW_AT_abstract_origin {{.*}} "bar"
+; CHECK:         DW_TAG_formal_parameter
+; CHECK-NEXT:      DW_AT_const_value
+; CHECK-NEXT:      DW_AT_abstract_origin {{.*}} "a"
+; CHECK:       DW_TAG_inlined_subroutine
+; CHECK-NEXT:    DW_AT_abstract_origin {{.*}} "bar"
+; CHECK:         DW_TAG_formal_parameter
+; CHECK-NEXT:      DW_AT_const_value
+; CHECK-NEXT:      DW_AT_abstract_origin {{.*}} "a"
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !12, metadata !17) #3, !dbg !18
+  tail call void @sink() #3, !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !12, metadata !17) #3, !dbg !21
+  tail call void @sink() #3, !dbg !23
+  ret void, !dbg !24
+}
+
+declare void @sink() #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 235110) (llvm/trunk 235108)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !MDFile(filename: "t.c", directory: "/path/to/dir")
+!2 = !{}
+!3 = !{!4, !7}
+!4 = !MDSubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @foo, variables: !2)
+!5 = !MDSubroutineType(types: !6)
+!6 = !{null}
+!7 = !MDSubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !8, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!8 = !MDSubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !{!12}
+!12 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 1, scope: !7, file: !1, line: 2, type: !10)
+!13 = !{i32 2, !"Dwarf Version", i32 2}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"PIC Level", i32 2}
+!16 = !{!"clang version 3.7.0 (trunk 235110) (llvm/trunk 235108)"}
+!17 = !MDExpression()
+!18 = !MDLocation(line: 2, column: 52, scope: !7, inlinedAt: !19)
+!19 = distinct !MDLocation(line: 4, column: 3, scope: !4)
+!20 = !MDLocation(line: 2, column: 57, scope: !7, inlinedAt: !19)
+!21 = !MDLocation(line: 2, column: 52, scope: !7, inlinedAt: !22)
+!22 = distinct !MDLocation(line: 5, column: 3, scope: !4)
+!23 = !MDLocation(line: 2, column: 57, scope: !7, inlinedAt: !22)
+!24 = !MDLocation(line: 6, column: 1, scope: !4)
diff --git a/test/DebugInfo/X86/mi-print.ll b/test/DebugInfo/X86/mi-print.ll
new file mode 100644
index 0000000..26dd0cb
--- /dev/null
+++ b/test/DebugInfo/X86/mi-print.ll
@@ -0,0 +1,56 @@
+; RUN: llc -debug -mtriple x86_64-apple-darwin < %s -o /dev/null 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; Check that llc -debug actually prints variables and locations, rather than
+; crashing.
+
+; CHECK: DBG_VALUE
+
+; Generated using `clang -g -O2 -S -emit-llvm -g` on the following source:
+;
+; static int foo(int x) { return x; }
+; int bar(int x) { return foo(x); }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @bar(i32 %x) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !9, metadata !17), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !19, metadata !17), !dbg !21
+  ret i32 %x, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !MDFile(filename: "t.c", directory: "/Users/dexonsmith/data/llvm/debug-info/test/DebugInfo/X86")
+!2 = !{}
+!3 = !{!4, !10}
+!4 = !MDSubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: i32 (i32)* @bar, variables: !8)
+!5 = !MDSubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !4, file: !1, line: 2, type: !7)
+!10 = !MDSubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: true, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!11 = !{!12}
+!12 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
+!13 = !{i32 2, !"Dwarf Version", i32 2}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"PIC Level", i32 2}
+!16 = !{!"clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)"}
+!17 = !MDExpression()
+!18 = !MDLocation(line: 2, column: 13, scope: !4)
+!19 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
+!20 = distinct !MDLocation(line: 2, column: 25, scope: !4)
+!21 = !MDLocation(line: 1, column: 20, scope: !10, inlinedAt: !20)
+!22 = !MDLocation(line: 2, column: 18, scope: !4)
diff --git a/test/DebugInfo/X86/nodebug_with_debug_loc.ll b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
index ba19f23..e4fb062 100644
--- a/test/DebugInfo/X86/nodebug_with_debug_loc.ll
+++ b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
@@ -124,7 +124,7 @@ attributes #3 = { nounwind }
 !24 = !{i32 2, !"Debug Info Version", i32 3}
 !25 = !{!"clang version 3.5.0 "}
 !26 = !MDLocation(line: 15, scope: !11)
-!27 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20, inlinedAt: !28)
+!27 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
 !28 = !MDLocation(line: 16, scope: !11)
 !29 = !MDLocation(line: 13, scope: !17, inlinedAt: !28)
 !30 = !MDLocation(line: 17, scope: !11)
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index f08efc1..172bacb 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -42,11 +42,9 @@
 !11 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{!13}
 !13 = !MDSubrange(count: 42, lowerBound: -3)
-!14 = !MDSubprogram(name: "A", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !6, scope: !7, type: !15, variables: !18)
+!14 = !MDSubprogram(name: "A", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !6, scope: !7, type: !15)
 !15 = !MDSubroutineType(types: !16)
 !16 = !{null, !17}
 !17 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !7)
-!18 = !{!19}
-!19 = !{} ; previously: invalid DW_TAG_base_type
 !20 = !MDFile(filename: "t.cpp", directory: "/Volumes/Sandbox/llvm")
 !21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/nophysreg.ll b/test/DebugInfo/X86/nophysreg.ll
index 65fd494..22394c8 100644
--- a/test/DebugInfo/X86/nophysreg.ll
+++ b/test/DebugInfo/X86/nophysreg.ll
@@ -196,7 +196,7 @@ attributes #3 = { ssp uwtable }
 !57 = !MDLocation(line: 23, column: 15, scope: !24)
 !58 = !MDLocation(line: 23, column: 7, scope: !24)
 !59 = !MDLocation(line: 24, column: 9, scope: !24)
-!60 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A", inlinedAt: !61)
+!60 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
 !61 = distinct !MDLocation(line: 26, column: 7, scope: !24)
 !62 = !MDLocation(line: 7, column: 42, scope: !11, inlinedAt: !61)
 !63 = !MDLocation(line: 7, column: 48, scope: !11, inlinedAt: !61)
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
index a100181..419732f 100644
--- a/test/DebugInfo/X86/parameters.ll
+++ b/test/DebugInfo/X86/parameters.ll
@@ -92,13 +92,12 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !7 = !{!8, !8}
 !8 = !MDCompositeType(tag: DW_TAG_structure_type, name: "foo", line: 2, size: 8, align: 8, file: !1, scope: !5, elements: !9)
 !9 = !{!10}
-!10 = !MDSubprogram(name: "foo", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !8, type: !11, variables: !16)
+!10 = !MDSubprogram(name: "foo", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !8, type: !11)
 !11 = !MDSubroutineType(types: !12)
 !12 = !{null, !13, !14}
 !13 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
 !14 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !15)
 !15 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!16 = !{i32 786468}
 !17 = !MDSubprogram(name: "func2", linkageName: "_ZN7pr147635func2EbNS_3fooE", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !18, function: void (i1, %"struct.pr14763::foo"*)* @_ZN7pr147635func2EbNS_3fooE, variables: !2)
 !18 = !MDSubroutineType(types: !19)
 !19 = !{null, !20, !8}
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 8b9f155..aff26f9 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -48,14 +48,10 @@ entry:
 !9 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
 !10 = !MDCompositeType(tag: DW_TAG_class_type, name: "foo", line: 1, size: 8, align: 8, file: !32, elements: !11)
 !11 = !{!12}
-!12 = !MDSubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !6, scope: !10, type: !13, variables: !16)
+!12 = !MDSubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !6, scope: !10, type: !13)
 !13 = !MDSubroutineType(types: !14)
 !14 = !{null, !15}
 !15 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !10)
-!16 = !{!17}
-!17 = !{} ; previously: invalid DW_TAG_base_type
-!18 = !{!19}
-!19 = !{} ; previously: invalid DW_TAG_base_type
 !20 = !MDSubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !6, scope: null, type: !13, function: void (%struct.foo*)* @_ZN3foo3barEv, declaration: !12)
 !23 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 4, arg: 1, scope: !5, file: !6, type: !9)
 !24 = !MDLocation(line: 4, column: 15, scope: !5)
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index d552f7b..887110f 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -92,96 +92,70 @@ entry:
 !13 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !14)
 !14 = !MDCompositeType(tag: DW_TAG_class_type, name: "BPLModuleWriter", line: 12, size: 8, align: 8, file: !160, elements: !15)
 !15 = !{!16}
-!16 = !MDSubprogram(name: "writeIntrinsic", linkageName: "_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", line: 13, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !6, scope: !14, type: !17, variables: !101)
+!16 = !MDSubprogram(name: "writeIntrinsic", linkageName: "_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", line: 13, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !6, scope: !14, type: !17)
 !17 = !MDSubroutineType(types: !18)
 !18 = !{null, !19, !20}
 !19 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !14)
 !20 = !MDCompositeType(tag: DW_TAG_class_type, name: "function<void ()>", line: 6, size: 8, align: 8, file: !160, elements: !21, templateParams: !97)
 !21 = !{!22, !51, !58, !86, !92}
-!22 = !MDSubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", line: 8, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: !20, type: !23, templateParams: !47, variables: !49)
+!22 = !MDSubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", line: 8, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: !20, type: !23, templateParams: !47)
 !23 = !MDSubroutineType(types: !24)
 !24 = !{null, !25, !26}
 !25 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !20)
 !26 = !MDCompositeType(tag: DW_TAG_class_type, line: 20, size: 8, align: 8, file: !160, scope: !5, elements: !27)
 !27 = !{!28, !35, !41}
-!28 = !MDSubprogram(name: "operator()", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !6, scope: !26, type: !29, variables: !33)
+!28 = !MDSubprogram(name: "operator()", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !6, scope: !26, type: !29)
 !29 = !MDSubroutineType(types: !30)
 !30 = !{null, !31}
 !31 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !32)
 !32 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !26)
-!33 = !{!34}
-!34 = !{} ; previously: invalid DW_TAG_base_type
-!35 = !MDSubprogram(name: "~", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !6, scope: !26, type: !36, variables: !39)
+!35 = !MDSubprogram(name: "~", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !6, scope: !26, type: !36)
 !36 = !MDSubroutineType(types: !37)
 !37 = !{null, !38}
 !38 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !26)
-!39 = !{!40}
-!40 = !{} ; previously: invalid DW_TAG_base_type
-!41 = !MDSubprogram(name: "", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !6, scope: !26, type: !42, variables: !45)
+!41 = !MDSubprogram(name: "", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !6, scope: !26, type: !42)
 !42 = !MDSubroutineType(types: !43)
 !43 = !{null, !38, !44}
 !44 = !MDDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !26)
-!45 = !{!46}
-!46 = !{} ; previously: invalid DW_TAG_base_type
 !47 = !{!48}
 !48 = !MDTemplateTypeParameter(name: "_Functor", type: !26)
-!49 = !{!50}
-!50 = !{} ; previously: invalid DW_TAG_base_type
-!51 = !MDSubprogram(name: "function<function<void ()> >", line: 8, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: !20, type: !52, templateParams: !54, variables: !56)
+!51 = !MDSubprogram(name: "function<function<void ()> >", line: 8, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: !20, type: !52, templateParams: !54)
 !52 = !MDSubroutineType(types: !53)
 !53 = !{null, !25, !20}
 !54 = !{!55}
 !55 = !MDTemplateTypeParameter(name: "_Functor", type: !20)
-!56 = !{!57}
-!57 = !{} ; previously: invalid DW_TAG_base_type
-!58 = !MDSubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", line: 8, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: !20, type: !59, templateParams: !82, variables: !84)
+!58 = !MDSubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", line: 8, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: !20, type: !59, templateParams: !82)
 !59 = !MDSubroutineType(types: !60)
 !60 = !{null, !25, !61}
 !61 = !MDCompositeType(tag: DW_TAG_class_type, line: 23, size: 8, align: 8, file: !160, scope: !5, elements: !62)
 !62 = !{!63, !70, !76}
-!63 = !MDSubprogram(name: "operator()", line: 23, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !6, scope: !61, type: !64, variables: !68)
+!63 = !MDSubprogram(name: "operator()", line: 23, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !6, scope: !61, type: !64)
 !64 = !MDSubroutineType(types: !65)
 !65 = !{null, !66}
 !66 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !67)
 !67 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !61)
-!68 = !{!69}
-!69 = !{} ; previously: invalid DW_TAG_base_type
-!70 = !MDSubprogram(name: "~", line: 23, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !6, scope: !61, type: !71, variables: !74)
+!70 = !MDSubprogram(name: "~", line: 23, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !6, scope: !61, type: !71)
 !71 = !MDSubroutineType(types: !72)
 !72 = !{null, !73}
 !73 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !61)
-!74 = !{!75}
-!75 = !{} ; previously: invalid DW_TAG_base_type
-!76 = !MDSubprogram(name: "", line: 23, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !6, scope: !61, type: !77, variables: !80)
+!76 = !MDSubprogram(name: "", line: 23, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !6, scope: !61, type: !77)
 !77 = !MDSubroutineType(types: !78)
 !78 = !{null, !73, !79}
 !79 = !MDDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !61)
-!80 = !{!81}
-!81 = !{} ; previously: invalid DW_TAG_base_type
 !82 = !{!83}
 !83 = !MDTemplateTypeParameter(name: "_Functor", type: !61)
-!84 = !{!85}
-!85 = !{} ; previously: invalid DW_TAG_base_type
-!86 = !MDSubprogram(name: "function", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !6, scope: !20, type: !87, variables: !90)
+!86 = !MDSubprogram(name: "function", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !6, scope: !20, type: !87)
 !87 = !MDSubroutineType(types: !88)
 !88 = !{null, !25, !89}
 !89 = !MDDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !20)
-!90 = !{!91}
-!91 = !{} ; previously: invalid DW_TAG_base_type
-!92 = !MDSubprogram(name: "~function", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !6, scope: !20, type: !93, variables: !95)
+!92 = !MDSubprogram(name: "~function", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !6, scope: !20, type: !93)
 !93 = !MDSubroutineType(types: !94)
 !94 = !{null, !25}
-!95 = !{!96}
-!96 = !{} ; previously: invalid DW_TAG_base_type
 !97 = !{!98}
 !98 = !MDTemplateTypeParameter(name: "T", type: !99)
 !99 = !MDSubroutineType(types: !100)
 !100 = !{null}
-!101 = !{!102}
-!102 = !{} ; previously: invalid DW_TAG_base_type
-!103 = !MDSubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 17, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 17, file: !6, scope: !10, type: !7, variables: !104)
-!104 = !{!105}
-!105 = !{} ; previously: invalid DW_TAG_base_type
+!103 = !MDSubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 17, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 17, file: !6, scope: !10, type: !7)
 !106 = !MDSubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !59, function: void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", templateParams: !82, declaration: !58, variables: !1)
 !107 = !MDSubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !108, function: void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", templateParams: !111, declaration: !113, variables: !1)
 !108 = !MDSubroutineType(types: !109)
@@ -189,19 +163,15 @@ entry:
 !110 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !61)
 !111 = !{!112}
 !112 = !MDTemplateTypeParameter(name: "_Tp", type: !61)
-!113 = !MDSubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !114, type: !108, templateParams: !111, variables: !124)
+!113 = !MDSubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !114, type: !108, templateParams: !111)
 !114 = !MDCompositeType(tag: DW_TAG_class_type, name: "_Base_manager", line: 1, size: 8, align: 8, file: !160, elements: !115)
 !115 = !{!116, !113}
-!116 = !MDSubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !114, type: !117, templateParams: !120, variables: !122)
+!116 = !MDSubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !114, type: !117, templateParams: !120)
 !117 = !MDSubroutineType(types: !118)
 !118 = !{null, !119}
 !119 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !26)
 !120 = !{!121}
 !121 = !MDTemplateTypeParameter(name: "_Tp", type: !26)
-!122 = !{!123}
-!123 = !{} ; previously: invalid DW_TAG_base_type
-!124 = !{!125}
-!125 = !{} ; previously: invalid DW_TAG_base_type
 !126 = !MDSubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !23, function: void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", templateParams: !47, declaration: !22, variables: !1)
 !127 = !MDSubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !117, function: void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", templateParams: !120, declaration: !116, variables: !1)
 !128 = !{!130}
diff --git a/test/DebugInfo/X86/recursive_inlining.ll b/test/DebugInfo/X86/recursive_inlining.ll
index 0d734ee..e20be83 100644
--- a/test/DebugInfo/X86/recursive_inlining.ll
+++ b/test/DebugInfo/X86/recursive_inlining.ll
@@ -236,7 +236,7 @@ attributes #3 = { nounwind }
 !34 = !{!"any pointer", !35, i64 0}
 !35 = !{!"omnipotent char", !36, i64 0}
 !36 = !{!"Simple C/C++ TBAA"}
-!37 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25, inlinedAt: !32)
+!37 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
 !38 = !MDLocation(line: 0, scope: !22, inlinedAt: !32)
 !39 = !MDLocation(line: 8, scope: !22, inlinedAt: !32)
 !40 = !MDLocation(line: 9, scope: !41, inlinedAt: !32)
@@ -256,7 +256,7 @@ attributes #3 = { nounwind }
 !54 = !MDLocation(line: 20, scope: !18, inlinedAt: !55)
 !55 = !MDLocation(line: 10, scope: !22)
 !56 = !MDLocation(line: 17, scope: !14, inlinedAt: !54)
-!57 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25, inlinedAt: !56)
+!57 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
 !58 = !MDLocation(line: 0, scope: !22, inlinedAt: !56)
 !59 = !MDLocation(line: 8, scope: !22, inlinedAt: !56)
 !60 = !MDLocation(line: 9, scope: !41, inlinedAt: !56)
@@ -266,7 +266,7 @@ attributes #3 = { nounwind }
 !64 = !MDLocation(line: 16, scope: !14, inlinedAt: !65)
 !65 = !MDLocation(line: 20, scope: !18)
 !66 = !MDLocation(line: 17, scope: !14, inlinedAt: !65)
-!67 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25, inlinedAt: !66)
+!67 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
 !68 = !MDLocation(line: 0, scope: !22, inlinedAt: !66)
 !69 = !MDLocation(line: 8, scope: !22, inlinedAt: !66)
 !70 = !MDLocation(line: 9, scope: !41, inlinedAt: !66)
diff --git a/test/DebugInfo/X86/reference-argument.ll b/test/DebugInfo/X86/reference-argument.ll
index 20d83ed..a4e5aba 100644
--- a/test/DebugInfo/X86/reference-argument.ll
+++ b/test/DebugInfo/X86/reference-argument.ll
@@ -48,19 +48,16 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !13 = !MDDerivedType(tag: DW_TAG_const_type, baseType: null)
 !14 = !MDDerivedType(tag: DW_TAG_member, name: "Kind", line: 16, size: 32, align: 32, offset: 64, file: !1, scope: !9, baseType: !15)
 !15 = !MDBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!16 = !MDSubprogram(name: "~SVal", line: 14, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: !9, type: !17, variables: !20)
+!16 = !MDSubprogram(name: "~SVal", line: 14, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: !9, type: !17)
 !17 = !MDSubroutineType(types: !18)
 !18 = !{null, !19}
 !19 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !9)
-!20 = !{i32 786468}
-!21 = !MDSubprogram(name: "SVal", line: 12, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !9, type: !17, variables: !22)
-!22 = !{i32 786468}
-!23 = !MDSubprogram(name: "SVal", line: 12, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !9, type: !24, variables: !28)
+!21 = !MDSubprogram(name: "SVal", line: 12, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !9, type: !17)
+!23 = !MDSubprogram(name: "SVal", line: 12, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !9, type: !24)
 !24 = !MDSubroutineType(types: !25)
 !25 = !{null, !19, !26}
 !26 = !MDDerivedType(tag: DW_TAG_reference_type, baseType: !27)
 !27 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!28 = !{i32 786468}
 !29 = !MDSubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !5, type: !30, function: i32 ()* @main, variables: !2)
 !30 = !MDSubroutineType(types: !31)
 !31 = !{!32}
@@ -73,12 +70,10 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !38 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !39)
 !39 = !MDCompositeType(tag: DW_TAG_class_type, name: "A", line: 20, size: 8, align: 8, file: !1, elements: !40)
 !40 = !{!41, !43}
-!41 = !MDSubprogram(name: "foo", linkageName: "_ZN1A3fooE4SVal", line: 22, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 22, file: !1, scope: !39, type: !36, variables: !42)
-!42 = !{i32 786468}
-!43 = !MDSubprogram(name: "A", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !1, scope: !39, type: !44, variables: !46)
+!41 = !MDSubprogram(name: "foo", linkageName: "_ZN1A3fooE4SVal", line: 22, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 22, file: !1, scope: !39, type: !36)
+!43 = !MDSubprogram(name: "A", line: 20, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !1, scope: !39, type: !44)
 !44 = !MDSubroutineType(types: !45)
 !45 = !{null, !38}
-!46 = !{i32 786468}
 !47 = !{i32 2, !"Dwarf Version", i32 3}
 !48 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "v", line: 19, arg: 1, scope: !4, file: !5, type: !8)
 !49 = !MDLocation(line: 19, scope: !4)
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index daf58c1..c6f0007 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -12,7 +12,7 @@ entry:
   call void @llvm.dbg.declare(metadata i32** %i.addr, metadata !11, metadata !MDExpression()), !dbg !12
   %0 = load i32*, i32** %i.addr, align 8, !dbg !13
   %1 = load i32, i32* %0, align 4, !dbg !13
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1), !dbg !13
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1), !dbg !13
   ret void, !dbg !15
 }
 
diff --git a/test/DebugInfo/X86/sroasplit-1.ll b/test/DebugInfo/X86/sroasplit-1.ll
index 6471106..a25b8b2 100644
--- a/test/DebugInfo/X86/sroasplit-1.ll
+++ b/test/DebugInfo/X86/sroasplit-1.ll
@@ -65,11 +65,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!22, !23}
 !llvm.ident = !{!24}
 
-!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
 !1 = !MDFile(filename: "sroasplit-1.c", directory: "")
 !2 = !MDExpression()
 !3 = !{!4}
-!4 = !MDSubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (%struct.Outer*)* @foo, variables: !2)
+!4 = !MDSubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (%struct.Outer*)* @foo, variables: !{})
 !5 = !MDFile(filename: "sroasplit-1.c", directory: "")
 !6 = !MDSubroutineType(types: !7)
 !7 = !{!8, !9}
diff --git a/test/DebugInfo/X86/sroasplit-2.ll b/test/DebugInfo/X86/sroasplit-2.ll
index 4bf634b..1a80f3e 100644
--- a/test/DebugInfo/X86/sroasplit-2.ll
+++ b/test/DebugInfo/X86/sroasplit-2.ll
@@ -71,11 +71,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
 !1 = !MDFile(filename: "sroasplit-2.c", directory: "")
 !2 = !MDExpression()
 !3 = !{!4}
-!4 = !MDSubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (i64, i64)* @foo, variables: !2)
+!4 = !MDSubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (i64, i64)* @foo, variables: !{})
 !5 = !MDFile(filename: "sroasplit-2.c", directory: "")
 !6 = !MDSubroutineType(types: !7)
 !7 = !{!8, !9}
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
index aac583c..9bf9edc 100644
--- a/test/DebugInfo/X86/stmt-list.ll
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -16,7 +16,7 @@ entry:
 
 !0 = !MDSubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3, function: void ()* @f)
 !1 = !MDFile(filename: "test2.c", directory: "/home/espindola/llvm")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !4, retainedTypes: !4, subprograms: !5)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{null}
 !6 = !MDFile(filename: "test2.c", directory: "/home/espindola/llvm")
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index 8632c58..ce3870f 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -9,7 +9,7 @@
 
 define i16 @f(i16 signext %zzz) nounwind {
 entry:
-  call void @llvm.dbg.value(metadata i16 %zzz, i64 0, metadata !0, metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i16 %zzz, i64 0, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   %conv = sext i16 %zzz to i32, !dbg !7
   %conv1 = trunc i32 %conv to i16
   ret i16 %conv1
@@ -24,7 +24,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !0 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "zzz", line: 3, arg: 1, scope: !1, file: !2, type: !6)
 !1 = !MDSubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !10, scope: !2, type: !4, function: i16 (i16)* @f)
 !2 = !MDFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/tmpfs/build")
-!3 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 1, file: !10, enums: !5, retainedTypes: !5, subprograms: !9, imports:  null)
+!3 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 1, file: !10, enums: !{}, retainedTypes: !{}, subprograms: !9, imports:  null)
 !4 = !MDSubroutineType(types: !5)
 !5 = !{null}
 !6 = !MDBasicType(tag: DW_TAG_base_type, name: "short", size: 16, align: 16, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/X86/subregisters.ll b/test/DebugInfo/X86/subregisters.ll
index 5e71d05..41691d2 100644
--- a/test/DebugInfo/X86/subregisters.ll
+++ b/test/DebugInfo/X86/subregisters.ll
@@ -44,7 +44,7 @@ entry:
   %a1 = getelementptr inbounds %struct.bar, %struct.bar* %b, i64 0, i32 0, !dbg !26
   %0 = load i32, i32* %a1, align 4, !dbg !26, !tbaa !27
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !16, metadata !MDExpression()), !dbg !26
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) #4, !dbg !32
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) #4, !dbg !32
   ret void, !dbg !33
 }
 
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index dfc96be..dfbc1c6 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -45,11 +45,10 @@ attributes #1 = { nounwind readnone }
 !13 = !{!14, !16}
 !14 = !MDDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !1, scope: !12, baseType: !15)
 !15 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!16 = !MDSubprogram(name: "Value", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !12, type: !17, variables: !20)
+!16 = !MDSubprogram(name: "Value", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !12, type: !17)
 !17 = !MDSubroutineType(types: !18)
 !18 = !{null, !19}
 !19 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !12)
-!20 = !{i32 786468}
 !21 = !{!22}
 !22 = !MDTemplateTypeParameter(name: "T", type: !8)
 !23 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 3, arg: 1, scope: !4, file: !11, type: !8)
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index 119ffd3..6701177 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -36,5 +36,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !12 = !MDLocation(line: 5, column: 3, scope: !7)
 !13 = !{!0}
 !14 = !MDFile(filename: "array.c", directory: "/private/tmp")
-!15 = !{i32 0}
+!15 = !{}
 !16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll
index d4d3d21..31f49e9 100644
--- a/test/DebugInfo/cross-cu-inlining.ll
+++ b/test/DebugInfo/cross-cu-inlining.ll
@@ -75,7 +75,7 @@ entry:
   %1 = bitcast i32* %x.addr.i to i8*
   call void @llvm.lifetime.start(i64 4, i8* %1)
   store i32 %0, i32* %x.addr.i, align 4
-  call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !20, metadata !MDExpression()), !dbg !21
+  call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !120, metadata !MDExpression()), !dbg !21
   %2 = load i32, i32* %x.addr.i, align 4, !dbg !22
   %mul.i = mul nsw i32 %2, 2, !dbg !22
   %3 = bitcast i32* %x.addr.i to i8*, !dbg !22
@@ -133,6 +133,9 @@ attributes #3 = { nounwind }
 !18 = !{!"clang version 3.5.0 "}
 !19 = !MDLocation(line: 4, scope: !4)
 !20 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
+
+!120 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
+
 !21 = !MDLocation(line: 1, scope: !12, inlinedAt: !19)
 !22 = !MDLocation(line: 2, scope: !12, inlinedAt: !19)
 !23 = !MDLocation(line: 1, scope: !12)
diff --git a/test/DebugInfo/debug-info-qualifiers.ll b/test/DebugInfo/debug-info-qualifiers.ll
index 1bfe367..96be71e 100644
--- a/test/DebugInfo/debug-info-qualifiers.ll
+++ b/test/DebugInfo/debug-info-qualifiers.ll
@@ -68,15 +68,13 @@ attributes #1 = { nounwind readnone }
 !4 = !MDCompositeType(tag: DW_TAG_class_type, name: "A", line: 2, size: 8, align: 8, file: !5, elements: !6, identifier: "_ZTS1A")
 !5 = !MDFile(filename: "debug-info-qualifiers.cpp", directory: "")
 !6 = !{!7, !13}
-!7 = !MDSubprogram(name: "l", linkageName: "_ZNKR1A1lEv", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped | DIFlagLValueReference, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !8, variables: !12)
+!7 = !MDSubprogram(name: "l", linkageName: "_ZNKR1A1lEv", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped | DIFlagLValueReference, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !8)
 !8 = !MDSubroutineType(flags: DIFlagLValueReference, types: !9)
 !9 = !{null, !10}
 !10 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !11)
 !11 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !"_ZTS1A")
-!12 = !{i32 786468}
-!13 = !MDSubprogram(name: "r", linkageName: "_ZNKO1A1rEv", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagObjectPointer | DIFlagRValueReference, isOptimized: false, scopeLine: 7, file: !5, scope: !"_ZTS1A", type: !14, variables: !15)
+!13 = !MDSubprogram(name: "r", linkageName: "_ZNKO1A1rEv", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagObjectPointer | DIFlagRValueReference, isOptimized: false, scopeLine: 7, file: !5, scope: !"_ZTS1A", type: !14)
 !14 = !MDSubroutineType(flags: DIFlagRValueReference, types: !9)
-!15 = !{i32 786468}
 !16 = !{!17}
 !17 = !MDSubprogram(name: "g", linkageName: "_Z1gv", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !18, type: !19, function: void ()* @_Z1gv, variables: !2)
 !18 = !MDFile(filename: "debug-info-qualifiers.cpp", directory: "")
diff --git a/test/DebugInfo/debuginfofinder-forward-declaration.ll b/test/DebugInfo/debuginfofinder-forward-declaration.ll
new file mode 100644
index 0000000..fc5d294
--- /dev/null
+++ b/test/DebugInfo/debuginfofinder-forward-declaration.ll
@@ -0,0 +1,42 @@
+; RUN: opt -analyze -module-debuginfo < %s | FileCheck %s
+
+
+; This module is generated from the following c-code:
+;
+; > union X;
+; >
+; > struct Y {
+; >     union X *x;
+; > };
+; >
+; > struct Y y;
+
+
+; CHECK: Type: Y from /tmp/minimal.c:3 DW_TAG_structure_type
+; CHECK: Type: x from /tmp/minimal.c:4 DW_TAG_member
+; CHECK: Type: DW_TAG_pointer_type
+; CHECK: Type: X from /tmp/minimal.c:1 DW_TAG_structure_type
+
+
+%struct.Y = type { %struct.X* }
+%struct.X = type opaque
+
+@y = common global %struct.Y zeroinitializer, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !MDFile(filename: "minimal.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = !MDGlobalVariable(name: "y", scope: !0, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, variable: %struct.Y* @y)
+!5 = !MDCompositeType(tag: DW_TAG_structure_type, name: "Y", file: !1, line: 3, size: 64, align: 64, elements: !6)
+!6 = !{!7}
+!7 = !MDDerivedType(tag: DW_TAG_member, name: "x", scope: !5, file: !1, line: 4, baseType: !8, size: 64, align: 64)
+!8 = !MDDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+!9 = !MDCompositeType(tag: DW_TAG_structure_type, name: "X", file: !1, line: 1, flags: DIFlagFwdDecl)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)"}
diff --git a/test/DebugInfo/debuginfofinder-multiple-cu.ll b/test/DebugInfo/debuginfofinder-multiple-cu.ll
index e05668c..d881d43 100644
--- a/test/DebugInfo/debuginfofinder-multiple-cu.ll
+++ b/test/DebugInfo/debuginfofinder-multiple-cu.ll
@@ -24,7 +24,7 @@ define void @g() {
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "test1.c", directory: "/tmp")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4}
 !4 = !MDSubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @f, variables: !2)
 !5 = !MDFile(filename: "test1.c", directory: "/tmp")
diff --git a/test/DebugInfo/inline-debug-info-multiret.ll b/test/DebugInfo/inline-debug-info-multiret.ll
index beec6d5..b4ab0b2 100644
--- a/test/DebugInfo/inline-debug-info-multiret.ll
+++ b/test/DebugInfo/inline-debug-info-multiret.ll
@@ -124,7 +124,7 @@ attributes #2 = { nounwind }
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "<unknown>", directory: "")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4, !10}
 !4 = !MDSubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4testi, variables: !2)
 !5 = !MDFile(filename: "test.cpp", directory: "")
diff --git a/test/DebugInfo/inline-debug-info.ll b/test/DebugInfo/inline-debug-info.ll
index 3127a38..f1f04ff 100644
--- a/test/DebugInfo/inline-debug-info.ll
+++ b/test/DebugInfo/inline-debug-info.ll
@@ -142,7 +142,7 @@ attributes #2 = { nounwind }
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "<unknown>", directory: "")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4, !10}
 !4 = !MDSubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4testi, variables: !2)
 !5 = !MDFile(filename: "test.cpp", directory: "")
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/inlined-arguments.ll
index 5886333..5d416f7 100644
--- a/test/DebugInfo/inlined-arguments.ll
+++ b/test/DebugInfo/inlined-arguments.ll
@@ -66,11 +66,11 @@ attributes #2 = { nounwind readnone }
 !13 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
 !14 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
 !15 = !{i32 undef}
-!16 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11, inlinedAt: !17)
+!16 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
 !17 = !MDLocation(line: 4, scope: !4)
 !18 = !MDLocation(line: 6, scope: !8, inlinedAt: !17)
 !19 = !{i32 2}
-!20 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11, inlinedAt: !17)
+!20 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
 !21 = !MDLocation(line: 7, scope: !8, inlinedAt: !17)
 !22 = !MDLocation(line: 5, scope: !4)
 !23 = !MDLocation(line: 6, scope: !8)
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index 8e2ee3a..2cf59cd 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -45,10 +45,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
 
-!18 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9, inlinedAt: !19)
+!18 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
 !19 = !MDLocation(line: 11, column: 10, scope: !5)
 !21 = !MDLocation(line: 3, column: 25, scope: !10, inlinedAt: !19)
-!22 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 4, scope: !10, file: !6, type: !9, inlinedAt: !19)
+!22 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 4, scope: !10, file: !6, type: !9)
 !23 = !MDLocation(line: 4, column: 16, scope: !10, inlinedAt: !19)
 !24 = !MDLocation(line: 5, column: 3, scope: !10, inlinedAt: !19)
 !25 = !MDLocation(line: 6, column: 3, scope: !10, inlinedAt: !19)
diff --git a/test/DebugInfo/member-order.ll b/test/DebugInfo/member-order.ll
index d57db12..0e1397c 100644
--- a/test/DebugInfo/member-order.ll
+++ b/test/DebugInfo/member-order.ll
@@ -49,12 +49,12 @@ attributes #1 = { nounwind readnone }
 !3 = !{!4}
 !4 = !MDCompositeType(tag: DW_TAG_structure_type, name: "foo", line: 1, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS3foo")
 !5 = !{!6, !11}
-!6 = !MDSubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !4, type: !7, variables: !10)
+!6 = !MDSubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !4, type: !7)
 !7 = !MDSubroutineType(types: !8)
 !8 = !{null, !9}
 !9 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
 !10 = !{i32 786468}
-!11 = !MDSubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !4, type: !7, variables: !12)
+!11 = !MDSubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !4, type: !7)
 !12 = !{i32 786468}
 !13 = !{!14}
 !14 = !MDSubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: null, type: !7, function: void (%struct.foo*)* @_ZN3foo2f1Ev, declaration: !6, variables: !2)
diff --git a/test/DebugInfo/missing-abstract-variable.ll b/test/DebugInfo/missing-abstract-variable.ll
index 84123cf..a26afb7 100644
--- a/test/DebugInfo/missing-abstract-variable.ll
+++ b/test/DebugInfo/missing-abstract-variable.ll
@@ -160,13 +160,13 @@ attributes #2 = { nounwind readnone }
 !22 = !{i32 2, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5.0 "}
 !24 = !{i1 false}
-!25 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11, inlinedAt: !26)
+!25 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
 !26 = !MDLocation(line: 14, scope: !4)
 !27 = !MDLocation(line: 5, scope: !14, inlinedAt: !26)
 !28 = !MDLocation(line: 10, scope: !14, inlinedAt: !26)
 !29 = !MDLocation(line: 15, scope: !4)
 !30 = !MDLocation(line: 17, scope: !8)
-!31 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11, inlinedAt: !32)
+!31 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
 !32 = !MDLocation(line: 18, scope: !8)
 !33 = !MDLocation(line: 5, scope: !14, inlinedAt: !32)
 !34 = !MDLocation(line: 6, scope: !19, inlinedAt: !32)
@@ -175,7 +175,7 @@ attributes #2 = { nounwind readnone }
 !37 = !{!"int", !38, i64 0}
 !38 = !{!"omnipotent char", !39, i64 0}
 !39 = !{!"Simple C/C++ TBAA"}
-!40 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 7, scope: !18, file: !5, type: !20, inlinedAt: !32)
+!40 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 7, scope: !18, file: !5, type: !20)
 !41 = !MDLocation(line: 8, scope: !18, inlinedAt: !32)
 !42 = !MDLocation(line: 9, scope: !18, inlinedAt: !32)
 !43 = !MDLocation(line: 10, scope: !14, inlinedAt: !32)
diff --git a/test/DebugInfo/multiline.ll b/test/DebugInfo/multiline.ll
index d31cc95..1fa5979 100644
--- a/test/DebugInfo/multiline.ll
+++ b/test/DebugInfo/multiline.ll
@@ -44,12 +44,12 @@
 ; Function Attrs: nounwind uwtable
 define void @f2() #0 {
 entry:
-  call void (...)* @f1(), !dbg !11
-  call void (...)* @f1(), !dbg !12
-  call void (...)* @f1(), !dbg !13
-  call void (...)* @f1(), !dbg !14
-  call void (...)* @f1(), !dbg !15
-  call void (...)* @f1(), !dbg !16
+  call void (...) @f1(), !dbg !11
+  call void (...) @f1(), !dbg !12
+  call void (...) @f1(), !dbg !13
+  call void (...) @f1(), !dbg !14
+  call void (...) @f1(), !dbg !15
+  call void (...) @f1(), !dbg !16
   ret void, !dbg !17
 }
 
diff --git a/test/DebugInfo/namespace_inline_function_definition.ll b/test/DebugInfo/namespace_inline_function_definition.ll
index 109994f..8628b3d 100644
--- a/test/DebugInfo/namespace_inline_function_definition.ll
+++ b/test/DebugInfo/namespace_inline_function_definition.ll
@@ -42,7 +42,7 @@ entry:
   store i32 0, i32* %retval
   %0 = load i32, i32* @x, align 4, !dbg !16
   store i32 %0, i32* %i.addr.i, align 4
-  call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !17, metadata !MDExpression()), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !117, metadata !MDExpression()), !dbg !18
   %1 = load i32, i32* %i.addr.i, align 4, !dbg !18
   %mul.i = mul nsw i32 %1, 2, !dbg !18
   ret i32 %mul.i, !dbg !16
@@ -88,5 +88,8 @@ attributes #2 = { nounwind readnone }
 !15 = !{!"clang version 3.5.0 "}
 !16 = !MDLocation(line: 5, scope: !4)
 !17 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
+
+!117 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
+
 !18 = !MDLocation(line: 6, scope: !9, inlinedAt: !16)
 !19 = !MDLocation(line: 6, scope: !9)
diff --git a/test/DebugInfo/template-recursive-void.ll b/test/DebugInfo/template-recursive-void.ll
index d6b3b8d..d33b8da 100644
--- a/test/DebugInfo/template-recursive-void.ll
+++ b/test/DebugInfo/template-recursive-void.ll
@@ -39,27 +39,23 @@
 !11 = !MDDerivedType(tag: DW_TAG_inheritance, scope: !9, baseType: !12)
 !12 = !MDCompositeType(tag: DW_TAG_class_type, name: "base", line: 3, size: 8, align: 8, file: !1, elements: !13)
 !13 = !{!14}
-!14 = !MDSubprogram(name: "base", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !12, type: !15, variables: !18)
+!14 = !MDSubprogram(name: "base", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !12, type: !15)
 !15 = !MDSubroutineType(types: !16)
 !16 = !{null, !17}
 !17 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !12)
-!18 = !{i32 786468}
-!19 = !MDSubprogram(name: "operator=", linkageName: "_ZN3fooIvEaSES0_", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !9, type: !20, variables: !24)
+!19 = !MDSubprogram(name: "operator=", linkageName: "_ZN3fooIvEaSES0_", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !9, type: !20)
 !20 = !MDSubroutineType(types: !21)
 !21 = !{null, !22, !23}
 !22 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !9)
 !23 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!24 = !{i32 786468}
-!25 = !MDSubprogram(name: "foo", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !9, type: !26, variables: !28)
+!25 = !MDSubprogram(name: "foo", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !9, type: !26)
 !26 = !MDSubroutineType(types: !27)
 !27 = !{null, !22}
-!28 = !{i32 786468}
 !29 = !{!30}
 !30 = !MDTemplateTypeParameter(name: "T", type: null)
-!31 = !MDSubprogram(name: "bar", line: 9, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !6, type: !32, variables: !35)
+!31 = !MDSubprogram(name: "bar", line: 9, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !6, type: !32)
 !32 = !MDSubroutineType(types: !33)
 !33 = !{null, !34}
 !34 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !6)
-!35 = !{i32 786468}
 !36 = !{i32 2, !"Dwarf Version", i32 3}
 !37 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/tu-composite.ll b/test/DebugInfo/tu-composite.ll
index 5f2ced5..a5f25fb 100644
--- a/test/DebugInfo/tu-composite.ll
+++ b/test/DebugInfo/tu-composite.ll
@@ -136,11 +136,10 @@ attributes #1 = { nounwind readnone }
 !10 = !MDSubroutineType(types: !11)
 !11 = !{!12}
 !12 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!13 = !MDSubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 2, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS1C", type: !14, containingType: !"_ZTS1C", variables: !17)
+!13 = !MDSubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 2, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS1C", type: !14, containingType: !"_ZTS1C")
 !14 = !MDSubroutineType(types: !15)
 !15 = !{null, !16}
 !16 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
-!17 = !{i32 786468}
 !18 = !MDCompositeType(tag: DW_TAG_structure_type, name: "bar", line: 7, size: 8, align: 8, file: !1, elements: !2, identifier: "_ZTS3bar")
 !19 = !MDCompositeType(tag: DW_TAG_structure_type, name: "D", line: 9, size: 8, align: 8, file: !1, elements: !20, identifier: "_ZTS1D")
 !20 = !{!21}
diff --git a/test/DebugInfo/varargs.ll b/test/DebugInfo/varargs.ll
index 3172c5e..8bdfef5 100644
--- a/test/DebugInfo/varargs.ll
+++ b/test/DebugInfo/varargs.ll
@@ -78,12 +78,11 @@ attributes #1 = { nounwind readnone }
 !3 = !{!4}
 !4 = !MDCompositeType(tag: DW_TAG_structure_type, name: "A", line: 3, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1A")
 !5 = !{!6}
-!6 = !MDSubprogram(name: "a", linkageName: "_ZN1A1aEiz", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !7, variables: !12)
+!6 = !MDSubprogram(name: "a", linkageName: "_ZN1A1aEiz", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !7)
 !7 = !MDSubroutineType(types: !8)
 !8 = !{null, !9, !10, null}
 !9 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !10 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!12 = !{i32 786468}
 !13 = !{!14}
 !14 = !MDSubprogram(name: "b", linkageName: "_Z1biz", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !15, type: !16, function: void (i32, ...)* @_Z1biz, variables: !2)
 !15 = !MDFile(filename: "llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", directory: "radar/13690847")
diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
index 1ddc5ce..7b1cb16 100644
--- a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
@@ -13,7 +13,7 @@ declare i32 @printf(i8*, ...)
 
 define i32 @main(i32 %argc, i8** %argv) {
 bb0:
-	call i32 (i8*, ...)* @printf( i8* getelementptr ([10 x i8], [10 x i8]* @.LC0, i64 0, i64 0), i32 %argc )		; <i32>:0 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* getelementptr ([10 x i8], [10 x i8]* @.LC0, i64 0, i64 0), i32 %argc )		; <i32>:0 [#uses=0]
 	%cast224 = bitcast i8** %argv to i8*		; <i8*> [#uses=1]
 	%local = alloca i8*		; <i8**> [#uses=3]
 	store i8* %cast224, i8** %local
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
index 4e4ad2a..c0a5d3b 100644
--- a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -48,7 +48,7 @@ cond_false:		; preds = %entry
 cond_next:		; preds = %cond_false, %cond_true
 	%tmp5 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp6 = load i32, i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	%tmp7 = call i32 (i8*, ...)* @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
+	%tmp7 = call i32 (i8*, ...) @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
index bd7beb8..670c142 100644
--- a/test/ExecutionEngine/MCJIT/fpbitcast.ll
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -13,7 +13,7 @@ define i32 @main()
 {
        %res = call i32 @test(double 3.14)
        %ptr = getelementptr [4 x i8], [4 x i8]* @format, i32 0, i32 0
-       call i32 (i8*,...)* @printf(i8* %ptr, i32 %res)
+       call i32 (i8*,...) @printf(i8* %ptr, i32 %res)
        ret i32 0
 }
 
diff --git a/test/ExecutionEngine/MCJIT/hello-sm-pic.ll b/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
deleted file mode 100644
index 4843f4e..0000000
--- a/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: %lli -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, darwin, aarch64, arm
-
-@.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%reg210 = call i32 @puts( i8* getelementptr ([12 x i8], [12 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/ExecutionEngine/MCJIT/hello2.ll b/test/ExecutionEngine/MCJIT/hello2.ll
index 13b2588..31a1a6e 100644
--- a/test/ExecutionEngine/MCJIT/hello2.ll
+++ b/test/ExecutionEngine/MCJIT/hello2.ll
@@ -6,7 +6,7 @@
 declare void @printf([13 x i8]*, ...)
 
 define void @bar() {
-	call void ([13 x i8]*, ...)* @printf( [13 x i8]* @msg )
+	call void ([13 x i8]*, ...) @printf( [13 x i8]* @msg )
 	ret void
 }
 
diff --git a/test/ExecutionEngine/OrcLazy/anonymous_globals.ll b/test/ExecutionEngine/OrcLazy/anonymous_globals.ll
new file mode 100644
index 0000000..c4c09a0
--- /dev/null
+++ b/test/ExecutionEngine/OrcLazy/anonymous_globals.ll
@@ -0,0 +1,18 @@
+; RUN: lli -jit-kind=orc-lazy %s
+
+define private void @0() {
+entry:
+  ret void
+}
+
+define private void @"\01L_foo"() {
+entry:
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
+entry:
+  call void @0()
+  tail call void @"\01L_foo"()
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/OrcLazy/hello.ll b/test/ExecutionEngine/OrcLazy/hello.ll
new file mode 100644
index 0000000..795224e
--- /dev/null
+++ b/test/ExecutionEngine/OrcLazy/hello.ll
@@ -0,0 +1,35 @@
+; RUN: lli -jit-kind=orc-lazy -orc-lazy-debug=funcs-to-stdout %s | FileCheck %s
+;
+; CHECK: Hello
+; CHECK: [ {{.*}}main$orc_body ]
+; CHECK: Goodbye
+
+%class.Foo = type { i8 }
+
+@f = global %class.Foo zeroinitializer, align 1
+@__dso_handle = external global i8
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_hello.cpp, i8* null }]
+@str = private unnamed_addr constant [6 x i8] c"Hello\00"
+@str2 = private unnamed_addr constant [8 x i8] c"Goodbye\00"
+
+define linkonce_odr void @_ZN3FooD1Ev(%class.Foo* nocapture readnone %this) unnamed_addr align 2 {
+entry:
+  %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str2, i64 0, i64 0))
+  ret void
+}
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
+entry:
+  ret i32 0
+}
+
+define internal void @_GLOBAL__sub_I_hello.cpp() {
+entry:
+  %puts.i.i.i = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str, i64 0, i64 0))
+  %0 = tail call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.Foo*)* @_ZN3FooD1Ev to void (i8*)*), i8* getelementptr inbounds (%class.Foo, %class.Foo* @f, i64 0, i32 0), i8* @__dso_handle)
+  ret void
+}
+
+declare i32 @puts(i8* nocapture readonly)
diff --git a/test/ExecutionEngine/OrcLazy/private_linkage.ll b/test/ExecutionEngine/OrcLazy/private_linkage.ll
new file mode 100644
index 0000000..11813dd
--- /dev/null
+++ b/test/ExecutionEngine/OrcLazy/private_linkage.ll
@@ -0,0 +1,12 @@
+; RUN: lli -jit-kind=orc-lazy %s
+
+define private void @_ZL3foov() {
+entry:
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
+entry:
+  tail call void @_ZL3foov()
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/OrcLazy/trivial_retval_1.ll b/test/ExecutionEngine/OrcLazy/trivial_retval_1.ll
deleted file mode 100644
index 701f22c..0000000
--- a/test/ExecutionEngine/OrcLazy/trivial_retval_1.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: sh -c 'lli -jit-kind=orc-lazy %s; echo $?' | FileCheck %s
-; CHECK: {{^30$}}
-define i32 @baz() {
-entry:
-  ret i32 2
-}
-
-define i32 @bar() {
-entry:
-  %call = call i32 @baz()
-  %mul = mul nsw i32 3, %call
-  ret i32 %mul
-}
-
-define i32 @foo() {
-entry:
-  %call = call i32 @bar()
-  %mul = mul nsw i32 5, %call
-  ret i32 %mul
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
-entry:
-  %call = call i32 @foo()
-  ret i32 %call
-}
diff --git a/test/ExecutionEngine/OrcMCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/OrcMCJIT/2002-12-16-ArgTest.ll
index e8eb693..825892e 100644
--- a/test/ExecutionEngine/OrcMCJIT/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/OrcMCJIT/2002-12-16-ArgTest.ll
@@ -13,7 +13,7 @@ declare i32 @printf(i8*, ...)
 
 define i32 @main(i32 %argc, i8** %argv) {
 bb0:
-	call i32 (i8*, ...)* @printf( i8* getelementptr ([10 x i8], [10 x i8]* @.LC0, i64 0, i64 0), i32 %argc )		; <i32>:0 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* getelementptr ([10 x i8], [10 x i8]* @.LC0, i64 0, i64 0), i32 %argc )		; <i32>:0 [#uses=0]
 	%cast224 = bitcast i8** %argv to i8*		; <i8*> [#uses=1]
 	%local = alloca i8*		; <i8**> [#uses=3]
 	store i8* %cast224, i8** %local
diff --git a/test/ExecutionEngine/OrcMCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/OrcMCJIT/2008-06-05-APInt-OverAShr.ll
index 9dc3e5b..f67ae6a 100644
--- a/test/ExecutionEngine/OrcMCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/OrcMCJIT/2008-06-05-APInt-OverAShr.ll
@@ -48,7 +48,7 @@ cond_false:		; preds = %entry
 cond_next:		; preds = %cond_false, %cond_true
 	%tmp5 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp6 = load i32, i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	%tmp7 = call i32 (i8*, ...)* @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
+	%tmp7 = call i32 (i8*, ...) @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next
diff --git a/test/ExecutionEngine/OrcMCJIT/fpbitcast.ll b/test/ExecutionEngine/OrcMCJIT/fpbitcast.ll
index 767b580..d2dbe31 100644
--- a/test/ExecutionEngine/OrcMCJIT/fpbitcast.ll
+++ b/test/ExecutionEngine/OrcMCJIT/fpbitcast.ll
@@ -13,7 +13,7 @@ define i32 @main()
 {
        %res = call i32 @test(double 3.14)
        %ptr = getelementptr [4 x i8], [4 x i8]* @format, i32 0, i32 0
-       call i32 (i8*,...)* @printf(i8* %ptr, i32 %res)
+       call i32 (i8*,...) @printf(i8* %ptr, i32 %res)
        ret i32 0
 }
 
diff --git a/test/ExecutionEngine/OrcMCJIT/hello-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/hello-sm-pic.ll
deleted file mode 100644
index 59b47af..0000000
--- a/test/ExecutionEngine/OrcMCJIT/hello-sm-pic.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, darwin, aarch64, arm
-
-@.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%reg210 = call i32 @puts( i8* getelementptr ([12 x i8], [12 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/ExecutionEngine/OrcMCJIT/hello2.ll b/test/ExecutionEngine/OrcMCJIT/hello2.ll
index 8f071cd..bb6a9cf 100644
--- a/test/ExecutionEngine/OrcMCJIT/hello2.ll
+++ b/test/ExecutionEngine/OrcMCJIT/hello2.ll
@@ -6,7 +6,7 @@
 declare void @printf([13 x i8]*, ...)
 
 define void @bar() {
-	call void ([13 x i8]*, ...)* @printf( [13 x i8]* @msg )
+	call void ([13 x i8]*, ...) @printf( [13 x i8]* @msg )
 	ret void
 }
 
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PIC_relocations.s
new file mode 100644
index 0000000..7598967
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PIC_relocations.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF1_x86-64.o %s
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF2_x86-64.o %s
+# RUN: llc -mtriple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalGlobal_x86-64.o %S/Inputs/ExternalGlobal.ll
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify %T/test_ELF1_x86-64.o  %T/test_ELF_ExternalGlobal_x86-64.o
+# Test that we can load this code twice at memory locations more than 2GB apart
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify -map-section test_ELF1_x86-64.o,.got=0x10000 -map-section test_ELF2_x86-64.o,.text=0x100000000 -map-section test_ELF2_x86-64.o,.got=0x100010000 %T/test_ELF1_x86-64.o %T/test_ELF2_x86-64.o %T/test_ELF_ExternalGlobal_x86-64.o
+
+# Assembly obtained by compiling the following and adding checks:
+# @G = external global i8*
+#
+# define i8* @foo() {
+#    %ret = load i8** @G
+#    ret i32 %ret
+# }
+#
+
+#
+	.text
+	.file	"ELF_x64-64_PIC_relocations.ll"
+	.align	16, 0x90
+	.type	foo,@function
+foo:                                    # @foo
+# BB#0:
+	movq	G@GOTPCREL(%rip), %rax
+	movl	(%rax), %eax
+	retq
+.Ltmp0:
+	.size	foo, .Ltmp0-foo
+
+
+	.section	".note.GNU-stack","",@progbits
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ExternalGlobal.ll b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ExternalGlobal.ll
new file mode 100644
index 0000000..51002aa
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ExternalGlobal.ll
@@ -0,0 +1,2 @@
+@F = global i8 0
+@G = global i8* @F
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
index 502f276..f28e4d2 100644
--- a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
@@ -31,6 +31,13 @@ insn3:
         movl	$0, %eax
 	retq
 
+# Test processing of the __eh_frame section.
+# rtdyld-check: *{8}(section_addr(test_x86-64.o, __eh_frame) + 0x20) = eh_frame_test - (section_addr(test_x86-64.o, __eh_frame) + 0x20)
+eh_frame_test:
+        .cfi_startproc
+        retq
+        .cfi_endproc
+
         .comm   y,4,2
 
         .section	__DATA,__data
diff --git a/test/ExecutionEngine/fma3-jit.ll b/test/ExecutionEngine/fma3-jit.ll
index fe62854..b68b7ae 100644
--- a/test/ExecutionEngine/fma3-jit.ll
+++ b/test/ExecutionEngine/fma3-jit.ll
@@ -10,7 +10,7 @@ define i32 @main() {
   %fma = tail call double @llvm.fma.f64(double 3.0, double 3.0, double 3.0) nounwind readnone
 
   %ptr1 = getelementptr [4 x i8], [4 x i8]* @msg_double, i32 0, i32 0
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %fma)
+  call i32 (i8*,...) @printf(i8* %ptr1, double %fma)
 
   ret i32 0
 }
diff --git a/test/ExecutionEngine/frem.ll b/test/ExecutionEngine/frem.ll
index 2276677..aedaae3 100644
--- a/test/ExecutionEngine/frem.ll
+++ b/test/ExecutionEngine/frem.ll
@@ -14,7 +14,7 @@ define i32 @main() {
   %flt = load float, float* @flt
   %float2 = frem float %flt, 5.0
   %double1 = fpext float %float2 to double
-  call i32 (i8*, ...)* @printf(i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double %double1)
+  call i32 (i8*, ...) @printf(i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double %double1)
   call i32 @fflush(i8* null)
   ret i32 0
 }
diff --git a/test/ExecutionEngine/test-interp-vec-loadstore.ll b/test/ExecutionEngine/test-interp-vec-loadstore.ll
index 6819724..b66a935 100644
--- a/test/ExecutionEngine/test-interp-vec-loadstore.ll
+++ b/test/ExecutionEngine/test-interp-vec-loadstore.ll
@@ -73,7 +73,7 @@ define i32 @main() {
   br i1 %res_i, label %Print_int, label %Double
 Print_int:
   %ptr0 = getelementptr [17 x i8], [17 x i8]* @msg_int, i32 0, i32 0
-  call i32 (i8*,...)* @printf(i8* %ptr0)
+  call i32 (i8*,...) @printf(i8* %ptr0)
   br label %Double
 Double:
   store <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, <4 x double>* %b, align 16
@@ -117,7 +117,7 @@ Double:
   br i1 %res_double, label %Print_double, label %Float
 Print_double:
   %ptr1 = getelementptr [20 x i8], [20 x i8]* @msg_double, i32 0, i32 0
-  call i32 (i8*,...)* @printf(i8* %ptr1)
+  call i32 (i8*,...) @printf(i8* %ptr1)
   br label %Float
 Float:
   store <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, <4 x float>* %c, align 16
@@ -161,7 +161,7 @@ Float:
   br i1 %res_float, label %Print_float, label %Exit
 Print_float:
   %ptr2 = getelementptr [19 x i8], [19 x i8]* @msg_float, i32 0, i32 0
-  call i32 (i8*,...)* @printf(i8* %ptr2)
+  call i32 (i8*,...) @printf(i8* %ptr2)
   br label %Exit
 Exit:
 
diff --git a/test/Feature/aliases.ll b/test/Feature/aliases.ll
index f03e3cf..b3b9ceb 100644
--- a/test/Feature/aliases.ll
+++ b/test/Feature/aliases.ll
@@ -35,7 +35,7 @@ entry:
    %tmp0 = load i32, i32* @bar_i
    %tmp2 = call i32 @foo_f()
    %tmp3 = add i32 %tmp, %tmp2
-   %tmp4 = call %FunTy* @bar_f()
+   %tmp4 = call %FunTy @bar_f()
    %tmp5 = add i32 %tmp3, %tmp4
    %tmp6 = add i32 %tmp1, %tmp5
    %tmp7 = add i32 %tmp6, %tmp0
diff --git a/test/Feature/attributes.ll b/test/Feature/attributes.ll
index 0392d86..cae3cbf 100644
--- a/test/Feature/attributes.ll
+++ b/test/Feature/attributes.ll
@@ -6,7 +6,7 @@
 
 define void @foo() #0 {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i32 0, i32 0))
   ret void
 }
 
diff --git a/test/Feature/paramattrs.ll b/test/Feature/paramattrs.ll
index 9860f5a..d24791e 100644
--- a/test/Feature/paramattrs.ll
+++ b/test/Feature/paramattrs.ll
@@ -14,7 +14,7 @@ declare void @exit(i32) noreturn nounwind
 
 define i32 @main(i32 inreg %argc, i8 ** inreg %argv) nounwind {
     %val = trunc i32 %argc to i16
-    %res1 = call signext i16 (i16 ) *@test(i16 signext %val) 
+    %res1 = call signext i16 (i16 )@test(i16 signext %val) 
     %two = add i16 %res1, %res1
     %res2 = call zeroext i8 @test2(i16 zeroext %two )  
     %retVal = sext i16 %two to i32
diff --git a/test/Feature/testvarargs.ll b/test/Feature/testvarargs.ll
index a73b7ec..ccc81f6 100644
--- a/test/Feature/testvarargs.ll
+++ b/test/Feature/testvarargs.ll
@@ -6,7 +6,7 @@
 declare i32 @printf(i8*, ...)   ;; Prototype for: int __builtin_printf(const char*, ...)
 
 define i32 @testvarar() {
-        call i32 (i8*, ...)* @printf( i8* null, i32 12, i8 42 )         ; <i32>:1 [#uses=1]
+        call i32 (i8*, ...) @printf( i8* null, i32 12, i8 42 )         ; <i32>:1 [#uses=1]
         ret i32 %1
 }
 
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll b/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll
new file mode 100644
index 0000000..cfa91d4
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -asan -S -o %t.ll
+; RUN: FileCheck %s < %t.ll
+; RUN: llc < %t.ll | FileCheck %s --check-prefix=ASM
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i386-pc-windows-msvc"
+
+define void @MyCPUID(i32 %fxn, i32* %out) sanitize_address {
+  %fxn.ptr = alloca i32
+  %a.ptr = alloca i32
+  %b.ptr = alloca i32
+  %c.ptr = alloca i32
+  %d.ptr = alloca i32
+  store i32 %fxn, i32* %fxn.ptr
+  call void asm sideeffect inteldialect "xchg ebx, esi\0A\09mov eax, dword ptr $4\0A\09cpuid\0A\09mov dword ptr $0, eax\0A\09mov dword ptr $1, ebx\0A\09mov dword ptr $2, ecx\0A\09mov dword ptr $3, edx\0A\09xchg ebx, esi", "=*m,=*m,=*m,=*m,*m,~{eax},~{ebx},~{ecx},~{edx},~{esi},~{dirflag},~{fpsr},~{flags}"(i32* %a.ptr, i32* %b.ptr, i32* %c.ptr, i32* %d.ptr, i32* %fxn.ptr)
+
+  %a = load i32, i32* %a.ptr
+  %a.out = getelementptr inbounds i32, i32* %out, i32 0
+  store i32 %a, i32* %a.out
+
+  %b = load i32, i32* %b.ptr
+  %b.out = getelementptr inbounds i32, i32* %out, i32 1
+  store i32 %b, i32* %b.out
+
+  %c = load i32, i32* %c.ptr
+  %c.out = getelementptr inbounds i32, i32* %out, i32 2
+  store i32 %c, i32* %c.out
+
+  %d = load i32, i32* %d.ptr
+  %d.out = getelementptr inbounds i32, i32* %out, i32 3
+  store i32 %d, i32* %d.out
+
+  ret void
+}
+
+; We used to introduce stack mallocs for UAR detection, but that makes LLVM run
+; out of registers on 32-bit platforms. Therefore, we don't do stack malloc on
+; such functions.
+
+; CHECK-LABEL: define void @MyCPUID(i32 %fxn, i32* %out)
+; CHECK: %MyAlloca = alloca [96 x i8], align 32
+; CHECK-NOT: call {{.*}} @__asan_stack_malloc
+
+; The code generator should recognize that all operands are just stack memory.
+; This is important with MS inline asm where operand lists are implicit and all
+; local variables can be referenced freely.
+
+; ASM-LABEL: MyCPUID:
+; ASM:      cpuid
+; ASM-NEXT: movl    %eax, {{[0-9]+}}(%esp)
+; ASM-NEXT: movl    %ebx, {{[0-9]+}}(%esp)
+; ASM-NEXT: movl    %ecx, {{[0-9]+}}(%esp)
+; ASM-NEXT: movl    %edx, {{[0-9]+}}(%esp)
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index da18b4b..8670344 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -34,7 +34,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.module.flags = !{!17}
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169314)", isOptimized: true, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
-!1 = !{i32 0}
+!1 = !{}
 !3 = !{!5}
 !5 = !MDSubprogram(name: "zzz", linkageName: "_Z3zzzi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !16, scope: !6, type: !7, function: i32 (i32)* @_Z3zzzi, variables: !1)
 !6 = !MDFile(filename: "a.cc", directory: "/usr/local/google/llvm_cmake_clang/tmp/debuginfo")
diff --git a/test/Instrumentation/DataFlowSanitizer/abilist.ll b/test/Instrumentation/DataFlowSanitizer/abilist.ll
index 7998513..90103f8 100644
--- a/test/Instrumentation/DataFlowSanitizer/abilist.ll
+++ b/test/Instrumentation/DataFlowSanitizer/abilist.ll
@@ -61,13 +61,13 @@ define void @f(i32 %x) {
   ; CHECK: %[[LABELVA1_1:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 1
   ; CHECK: store i16 %{{.*}}, i16* %[[LABELVA1_1]]
   ; CHECK: %[[LABELVA1_0A:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 0
-  ; CHECK: call void (i32, i16, i16*, ...)* @__dfsw_custom3(i32 1, i16 0, i16* %[[LABELVA1_0A]], i32 2, i32 %{{.*}})
-  call void (i32, ...)* @custom3(i32 1, i32 2, i32 %x)
+  ; CHECK: call void (i32, i16, i16*, ...) @__dfsw_custom3(i32 1, i16 0, i16* %[[LABELVA1_0A]], i32 2, i32 %{{.*}})
+  call void (i32, ...) @custom3(i32 1, i32 2, i32 %x)
 
   ; CHECK: %[[LABELVA2_0:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA2]], i32 0, i32 0
   ; CHECK: %[[LABELVA2_0A:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA2]], i32 0, i32 0
-  ; CHECK: call i32 (i32, i16, i16*, i16*, ...)* @__dfsw_custom4(i32 1, i16 0, i16* %[[LABELVA2_0A]], i16* %[[LABELRETURN]], i32 2, i32 3)
-  call i32 (i32, ...)* @custom4(i32 1, i32 2, i32 3)
+  ; CHECK: call i32 (i32, i16, i16*, i16*, ...) @__dfsw_custom4(i32 1, i16 0, i16* %[[LABELVA2_0A]], i16* %[[LABELRETURN]], i32 2, i32 3)
+  call i32 (i32, ...) @custom4(i32 1, i32 2, i32 3)
 
   ret void
 }
diff --git a/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll b/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
index fb1cdbb..855125a 100644
--- a/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
+++ b/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
@@ -16,7 +16,7 @@ entry:
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @foo() nounwind
+  tail call void (...) @foo() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.then
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 7472559..4e84aef 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -75,7 +75,7 @@ entry:
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @foo() nounwind
+  tail call void (...) @foo() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.then
@@ -841,7 +841,7 @@ entry:
   %agg.tmp.sroa.2.0.copyload = load i64, i64* %agg.tmp.sroa.2.0..sroa_cast, align 4
   %1 = bitcast %struct.StructByVal* %agg.tmp2 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %0, i64 16, i32 4, i1 false)
-  call void (i32, ...)* @VAArgStructFn(i32 undef, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, %struct.StructByVal* byval align 8 %agg.tmp2)
+  call void (i32, ...) @VAArgStructFn(i32 undef, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, %struct.StructByVal* byval align 8 %agg.tmp2)
   ret void
 }
 
@@ -862,7 +862,7 @@ entry:
 ; CHECK: bitcast { i32, i32, i32, i32 }* {{.*}}@__msan_va_arg_tls {{.*}}, i64 176
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
 ; CHECK: store i64 16, i64* @__msan_va_arg_overflow_size_tls
-; CHECK: call void (i32, ...)* @VAArgStructFn
+; CHECK: call void (i32, ...) @VAArgStructFn
 ; CHECK: ret void
 
 declare i32 @InnerTailCall(i32 %a)
diff --git a/test/Integer/2007-01-19-TruncSext.ll b/test/Integer/2007-01-19-TruncSext.ll
index a487eb2..d824871 100644
--- a/test/Integer/2007-01-19-TruncSext.ll
+++ b/test/Integer/2007-01-19-TruncSext.ll
@@ -24,7 +24,7 @@ define i32 @main(i32 %argc, i8** %argv) {
   %X = load i17, i17* %P
   %result = sext i17 %X to i32
   %fmt = getelementptr [4 x i8], [4 x i8]* @FORMAT, i32 0, i32 0
-  call i32 (i8*,...)* @printf(i8* %fmt, i32 %result)
+  call i32 (i8*,...) @printf(i8* %fmt, i32 %result)
   ret i32 0
 }
 
diff --git a/test/LTO/X86/keep-used-puts-during-instcombine.ll b/test/LTO/X86/keep-used-puts-during-instcombine.ll
index 74f577d..2624947 100644
--- a/test/LTO/X86/keep-used-puts-during-instcombine.ll
+++ b/test/LTO/X86/keep-used-puts-during-instcombine.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-apple-darwin11"
 define i32 @uses_printf(i32 %i) {
 entry:
   %s = getelementptr [13 x i8], [13 x i8]* @str, i64 0, i64 0
-  call i32 (i8*, ...)* @printf(i8* %s)
+  call i32 (i8*, ...) @printf(i8* %s)
   ret i32 0
 }
 
diff --git a/test/LTO/X86/no-undefined-puts-when-implemented.ll b/test/LTO/X86/no-undefined-puts-when-implemented.ll
index 7054609..132ec67 100644
--- a/test/LTO/X86/no-undefined-puts-when-implemented.ll
+++ b/test/LTO/X86/no-undefined-puts-when-implemented.ll
@@ -21,7 +21,7 @@ entry:
 define i32 @uses_printf(i32 %i) {
 entry:
   %s = getelementptr [13 x i8], [13 x i8]* @str, i64 0, i64 0
-  call i32 (i8*, ...)* @printf(i8* %s)
+  call i32 (i8*, ...) @printf(i8* %s)
   ret i32 0
 }
 
diff --git a/test/Linker/2011-08-04-DebugLoc.ll b/test/Linker/2011-08-04-DebugLoc.ll
index 18fa7da..25d08cd 100644
--- a/test/Linker/2011-08-04-DebugLoc.ll
+++ b/test/Linker/2011-08-04-DebugLoc.ll
@@ -26,6 +26,6 @@ define i32 @foo() nounwind ssp {
 !6 = !MDLocation(line: 2, column: 13, scope: !7)
 !7 = distinct !MDLexicalBlock(line: 2, column: 11, file: !8, scope: !1)
 !8 = !MDFile(filename: "a.c", directory: "/private/tmp")
-!9 = !{i32 0}
+!9 = !{}
 !10 = !{!1}
 !11 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/2011-08-04-DebugLoc2.ll b/test/Linker/2011-08-04-DebugLoc2.ll
index 93eb61d..76e73dd 100644
--- a/test/Linker/2011-08-04-DebugLoc2.ll
+++ b/test/Linker/2011-08-04-DebugLoc2.ll
@@ -23,6 +23,6 @@ define i32 @bar() nounwind ssp {
 !6 = !MDLocation(line: 1, column: 13, scope: !7)
 !7 = distinct !MDLexicalBlock(line: 1, column: 11, file: !8, scope: !1)
 !8 = !MDFile(filename: "b.c", directory: "/private/tmp")
-!9 = !{i32 0}
+!9 = !{}
 !10 = !{!1}
 !11 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/2011-08-04-Metadata.ll b/test/Linker/2011-08-04-Metadata.ll
index 2a1711a..7b39800 100644
--- a/test/Linker/2011-08-04-Metadata.ll
+++ b/test/Linker/2011-08-04-Metadata.ll
@@ -24,7 +24,7 @@ entry:
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !4, retainedTypes: !4, subprograms: !10)
+!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
 !1 = !MDSubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3, function: void ()* @foo)
 !2 = !MDFile(filename: "/tmp/one.c", directory: "/Volumes/Lalgate/Slate/D")
 !3 = !MDSubroutineType(types: !4)
diff --git a/test/Linker/2011-08-04-Metadata2.ll b/test/Linker/2011-08-04-Metadata2.ll
index e62c122..ba8c6db 100644
--- a/test/Linker/2011-08-04-Metadata2.ll
+++ b/test/Linker/2011-08-04-Metadata2.ll
@@ -19,7 +19,7 @@ entry:
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !4, retainedTypes: !4, subprograms: !10)
+!0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
 !1 = !MDSubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3, function: void ()* @bar)
 !2 = !MDFile(filename: "/tmp/two.c", directory: "/Volumes/Lalgate/Slate/D")
 !3 = !MDSubroutineType(types: !4)
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index 4ffb249..ee62e40 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -22,7 +22,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!5}
 !5 = !MDSubprogram(name: "foo", linkageName: "_Z3fooN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !16, scope: !6, type: !7, function: void ()* @_Z3fooN2N11AE)
 !6 = !MDFile(filename: "n1.c", directory: "/private/tmp")
diff --git a/test/Linker/2011-08-18-unique-class-type2.ll b/test/Linker/2011-08-18-unique-class-type2.ll
index da75160..a603aee 100644
--- a/test/Linker/2011-08-18-unique-class-type2.ll
+++ b/test/Linker/2011-08-18-unique-class-type2.ll
@@ -20,7 +20,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!5}
 !5 = !MDSubprogram(name: "bar", linkageName: "_Z3barN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scope: !6, type: !7, function: void ()* @_Z3barN2N11AE)
 !6 = !MDFile(filename: "n2.c", directory: "/private/tmp")
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index 7184d52..a8ccd27 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -14,7 +14,7 @@ entry:
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!5}
 !5 = !MDSubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7, function: i32 ()* @foo)
 !6 = !MDFile(filename: "one.c", directory: "/private/tmp")
diff --git a/test/Linker/2011-08-18-unique-debug-type2.ll b/test/Linker/2011-08-18-unique-debug-type2.ll
index 2b04560..242f953 100644
--- a/test/Linker/2011-08-18-unique-debug-type2.ll
+++ b/test/Linker/2011-08-18-unique-debug-type2.ll
@@ -14,7 +14,7 @@ entry:
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!5}
 !5 = !MDSubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7, function: i32 ()* @bar)
 !6 = !MDFile(filename: "two.c", directory: "/private/tmp")
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
index 9afb8fb..6a9239f 100644
--- a/test/Linker/DbgDeclare.ll
+++ b/test/Linker/DbgDeclare.ll
@@ -38,10 +38,9 @@ declare void @test(i32, i8**)
 !llvm.module.flags = !{!21}
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !20, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
-!1 = !{!2}
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!5}
-!5 = !MDSubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !20, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !1)
+!5 = !MDSubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !20, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !2)
 !6 = !MDFile(filename: "main.cpp", directory: "/private/tmp")
 !7 = !MDSubroutineType(types: !8)
 !8 = !{!9, !9, !10}
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
index 107d8c1..2afe0c2 100644
--- a/test/Linker/DbgDeclare2.ll
+++ b/test/Linker/DbgDeclare2.ll
@@ -51,10 +51,9 @@ declare i32 @puts(i8*)
 !llvm.module.flags = !{!27}
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !25, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
-!1 = !{!2}
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!5}
-!5 = !MDSubprogram(name: "print_args", linkageName: "test", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !26, scope: null, type: !7, function: void (i32, i8**)* @test, variables: !1)
+!5 = !MDSubprogram(name: "print_args", linkageName: "test", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !26, scope: null, type: !7, function: void (i32, i8**)* @test, variables: !2)
 !6 = !MDFile(filename: "test.cpp", directory: "/private/tmp")
 !7 = !MDSubroutineType(types: !8)
 !8 = !{null, !9, !10}
diff --git a/test/Linker/Inputs/basiclink.b.ll b/test/Linker/Inputs/basiclink.b.ll
index 0d2abc7..407aa94 100644
--- a/test/Linker/Inputs/basiclink.b.ll
+++ b/test/Linker/Inputs/basiclink.b.ll
@@ -1,6 +1,6 @@
 declare i32* @foo(...)
 define i32* @bar() {
-	%ret = call i32* (...)* @foo( i32 123 )
+	%ret = call i32* (...) @foo( i32 123 )
 	ret i32* %ret
 }
 @baz = global i32 0
diff --git a/test/Linker/Inputs/drop-debug.bc b/test/Linker/Inputs/drop-debug.bc
new file mode 100644
index 0000000..f9c471f
--- /dev/null
+++ b/test/Linker/Inputs/drop-debug.bc
diff --git a/test/Linker/Inputs/type-unique-inheritance-a.ll b/test/Linker/Inputs/type-unique-inheritance-a.ll
index 2159f98..82f9d55 100644
--- a/test/Linker/Inputs/type-unique-inheritance-a.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-a.ll
@@ -68,7 +68,7 @@ attributes #1 = { nounwind readnone }
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
 !1 = !MDFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4, !8}
 !4 = !MDCompositeType(tag: DW_TAG_class_type, name: "A", line: 3, size: 64, align: 32, file: !5, elements: !6, identifier: "_ZTS1A")
 !5 = !MDFile(filename: "./a.hpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
diff --git a/test/Linker/Inputs/type-unique-inheritance-b.ll b/test/Linker/Inputs/type-unique-inheritance-b.ll
index f16ae1a..2a27d7c 100644
--- a/test/Linker/Inputs/type-unique-inheritance-b.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-b.ll
@@ -42,7 +42,7 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !2, imports: !2)
 !1 = !MDFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4, !11, !15}
 !4 = !MDCompositeType(tag: DW_TAG_class_type, name: "B", line: 7, size: 128, align: 64, file: !5, elements: !6, identifier: "_ZTS1B")
 !5 = !MDFile(filename: "./b.hpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
diff --git a/test/Linker/broken.ll b/test/Linker/broken.ll
index 2276401..8aefe4e 100644
--- a/test/Linker/broken.ll
+++ b/test/Linker/broken.ll
@@ -1,6 +1,6 @@
 ; RUN: not llvm-link -o /dev/null %s 2>&1 | FileCheck %s
 
-; CHECK: input module '{{.*}}broken.ll' is broken
+; CHECK: broken.ll: error: input module is broken!
 define i32 @foo(i32 %v) {
   %first = add i32 %v, %second
   %second = add i32 %v, 3
diff --git a/test/Linker/drop-debug.ll b/test/Linker/drop-debug.ll
new file mode 100644
index 0000000..9c1072a
--- /dev/null
+++ b/test/Linker/drop-debug.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-link %p/Inputs/drop-debug.bc -o %t 2>&1 | FileCheck %s
+
+;; drop-debug.bc was created from "void f(void) {}" with clang 3.5 and
+; -gline-tables-only, so it contains old debug info.
+
+; CHECK: warning: ignoring debug info with an invalid version (1) in {{.*}}/Inputs/drop-debug.bc
diff --git a/test/Linker/subprogram-linkonce-weak-odr.ll b/test/Linker/subprogram-linkonce-weak-odr.ll
index 97058fd..52ce679 100644
--- a/test/Linker/subprogram-linkonce-weak-odr.ll
+++ b/test/Linker/subprogram-linkonce-weak-odr.ll
@@ -113,13 +113,13 @@ entry:
 ; DWLW-NOT:     DW_AT_low_pc
 ; DWLW-NOT:     DW_AT_high_pc
 ; DWLW:         DW_AT_name {{.*}}foo
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir/foo.h"
+; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
 ; DWLW:         DW_AT_decl_line {{.*}}(1)
 ; DWLW:       DW_TAG_subprogram
 ; DWLW:         DW_AT_low_pc
 ; DWLW:         DW_AT_high_pc
 ; DWLW:         DW_AT_name {{.*}}bar
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir/bar.c"
+; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
 ; DWLW:         DW_AT_decl_line {{.*}}(11)
 ; DWLW:         DW_TAG_inlined_subroutine
 ; DWLW:           DW_AT_abstract_origin
@@ -129,7 +129,7 @@ entry:
 ; DWLW:         DW_AT_low_pc
 ; DWLW:         DW_AT_high_pc
 ; DWLW:         DW_AT_name {{.*}}foo
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir/foo.h"
+; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
 ; DWLW:         DW_AT_decl_line {{.*}}(1)
 
 ; The DWARF output is already symmetric (just reordered).
@@ -139,7 +139,7 @@ entry:
 ; DWWL:         DW_AT_low_pc
 ; DWWL:         DW_AT_high_pc
 ; DWWL:         DW_AT_name {{.*}}foo
-; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir/foo.h"
+; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
 ; DWWL:         DW_AT_decl_line {{.*}}(1)
 ; DWWL:     DW_TAG_compile_unit
 ; DWWL:       DW_AT_name {{.*}}"bar.c"
@@ -154,7 +154,7 @@ entry:
 ; DWWL:         DW_AT_high_pc
 ; DWWL-NOT:     DW_AT_name {{.*}}foo
 ; DWWL:         DW_AT_name {{.*}}bar
-; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir/bar.c"
+; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
 ; DWWL:         DW_AT_decl_line {{.*}}(11)
 ; DWWL:         DW_TAG_inlined_subroutine
 ; DWWL:           DW_AT_abstract_origin
diff --git a/test/Linker/subprogram-linkonce-weak.ll b/test/Linker/subprogram-linkonce-weak.ll
index 42a8b39..94d4365 100644
--- a/test/Linker/subprogram-linkonce-weak.ll
+++ b/test/Linker/subprogram-linkonce-weak.ll
@@ -109,13 +109,13 @@ entry:
 ; DWLW-NOT:     DW_AT_low_pc
 ; DWLW-NOT:     DW_AT_high_pc
 ; DWLW:         DW_AT_name {{.*}}foo
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir/bar.c"
+; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
 ; DWLW:         DW_AT_decl_line {{.*}}(1)
 ; DWLW:       DW_TAG_subprogram
 ; DWLW:         DW_AT_low_pc
 ; DWLW:         DW_AT_high_pc
 ; DWLW:         DW_AT_name {{.*}}bar
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir/bar.c"
+; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
 ; DWLW:         DW_AT_decl_line {{.*}}(11)
 
 ; DWLW:         DW_TAG_inlined_subroutine
@@ -126,7 +126,7 @@ entry:
 ; DWLW:         DW_AT_low_pc
 ; DWLW:         DW_AT_high_pc
 ; DWLW:         DW_AT_name {{.*}}foo
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir/foo.c"
+; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.c"
 ; DWLW:         DW_AT_decl_line {{.*}}(51)
 
 ; The DWARF output is already symmetric (just reordered).
@@ -136,7 +136,7 @@ entry:
 ; DWWL:         DW_AT_low_pc
 ; DWWL:         DW_AT_high_pc
 ; DWWL:         DW_AT_name {{.*}}foo
-; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir/foo.c"
+; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.c"
 ; DWWL:         DW_AT_decl_line {{.*}}(51)
 ; DWWL:     DW_TAG_compile_unit
 ; DWWL:       DW_AT_name {{.*}}"bar.c"
@@ -144,13 +144,13 @@ entry:
 ; DWWL-NOT:     DW_AT_low_pc
 ; DWWL-NOT:     DW_AT_high_pc
 ; DWWL:         DW_AT_name {{.*}}foo
-; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir/bar.c"
+; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
 ; DWWL:         DW_AT_decl_line {{.*}}(1)
 ; DWWL:       DW_TAG_subprogram
 ; DWWL:         DW_AT_low_pc
 ; DWWL:         DW_AT_high_pc
 ; DWWL:         DW_AT_name {{.*}}bar
-; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir/bar.c"
+; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
 ; DWWL:         DW_AT_decl_line {{.*}}(11)
 ; DWWL:         DW_TAG_inlined_subroutine
 ; DWWL:           DW_AT_abstract_origin
diff --git a/test/Linker/type-unique-odr-a.ll b/test/Linker/type-unique-odr-a.ll
index 2321c01..a446547 100644
--- a/test/Linker/type-unique-odr-a.ll
+++ b/test/Linker/type-unique-odr-a.ll
@@ -82,11 +82,10 @@ attributes #1 = { nounwind readnone }
 !6 = !{!7, !9}
 !7 = !MDDerivedType(tag: DW_TAG_member, name: "data", line: 2, size: 32, align: 32, flags: DIFlagPrivate, file: !5, scope: !"_ZTS1A", baseType: !8)
 !8 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 4, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !"_ZTS1A", type: !10, variables: !13)
+!9 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 4, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !"_ZTS1A", type: !10)
 !10 = !MDSubroutineType(types: !11)
 !11 = !{null, !12}
 !12 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
-!13 = !{i32 786468}
 !14 = !{!15, !19}
 !15 = !MDSubprogram(name: "baz", linkageName: "_Z3bazv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !16, type: !17, function: void ()* @_Z3bazv, variables: !2)
 !16 = !MDFile(filename: "type-unique-odr-a.cpp", directory: "")
diff --git a/test/Linker/type-unique-odr-b.ll b/test/Linker/type-unique-odr-b.ll
index 7123619..c7a48aa 100644
--- a/test/Linker/type-unique-odr-b.ll
+++ b/test/Linker/type-unique-odr-b.ll
@@ -63,11 +63,10 @@ attributes #1 = { nounwind readnone }
 !6 = !{!7, !9}
 !7 = !MDDerivedType(tag: DW_TAG_member, name: "data", line: 3, size: 32, align: 32, flags: DIFlagPrivate, file: !5, scope: !"_ZTS1A", baseType: !8)
 !8 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !10, variables: !13)
+!9 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !10)
 !10 = !MDSubroutineType(types: !11)
 !11 = !{null, !12}
 !12 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
-!13 = !{i32 786468}
 !14 = !{!15, !16, !20}
 !15 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !5, scope: !"_ZTS1A", type: !10, function: void (%class.A*)* @_ZN1A6getFooEv, declaration: !9, variables: !2)
 !16 = !MDSubprogram(name: "f", linkageName: "_Z1fv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !17, type: !18, function: void ()* @_Z1fv, variables: !2)
diff --git a/test/Linker/type-unique-simple2-a.ll b/test/Linker/type-unique-simple2-a.ll
index ed53601..8a1cc20 100644
--- a/test/Linker/type-unique-simple2-a.ll
+++ b/test/Linker/type-unique-simple2-a.ll
@@ -94,26 +94,23 @@ attributes #4 = { nounwind readnone }
 !11 = !MDSubroutineType(types: !12)
 !12 = !{!13}
 !13 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !MDSubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 4, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !"_ZTS1A", type: !15, containingType: !"_ZTS1A", variables: !18)
+!14 = !MDSubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 4, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !"_ZTS1A", type: !15, containingType: !"_ZTS1A")
 !15 = !MDSubroutineType(types: !16)
 !16 = !{null, !17}
 !17 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
-!18 = !{i32 786468}
-!19 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 5, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !20, containingType: !"_ZTS1A", variables: !25)
+!19 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 5, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !20, containingType: !"_ZTS1A")
 !20 = !MDSubroutineType(types: !21)
 !21 = !{!22, !17}
 !22 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !23)
 !23 = !MDDerivedType(tag: DW_TAG_typedef, name: "foo_t", line: 1, file: !24, baseType: !13)
 !24 = !MDFile(filename: "a.cpp", directory: "")
-!25 = !{i32 786468}
 !26 = !{!27, !31, !34}
 !27 = !MDSubprogram(name: "bar", linkageName: "_Z3barv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !24, scope: !28, type: !29, function: i32 ()* @_Z3barv, variables: !2)
 !28 = !MDFile(filename: "a.cpp", directory: "")
 !29 = !MDSubroutineType(types: !30)
 !30 = !{!23}
 !31 = !MDSubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1AC1Ev, declaration: !32, variables: !2)
-!32 = !MDSubprogram(name: "A", isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS1A", type: !15, variables: !33)
-!33 = !{i32 786468}
+!32 = !MDSubprogram(name: "A", isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS1A", type: !15)
 !34 = !MDSubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1AC2Ev, declaration: !32, variables: !2)
 !35 = !{i32 2, !"Dwarf Version", i32 2}
 !36 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/type-unique-simple2-b.ll b/test/Linker/type-unique-simple2-b.ll
index 884995b..1679405 100644
--- a/test/Linker/type-unique-simple2-b.ll
+++ b/test/Linker/type-unique-simple2-b.ll
@@ -61,17 +61,15 @@ attributes #1 = { nounwind readnone }
 !11 = !MDSubroutineType(types: !12)
 !12 = !{!13}
 !13 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !MDSubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 4, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !"_ZTS1A", type: !15, containingType: !"_ZTS1A", variables: !18)
+!14 = !MDSubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 4, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !"_ZTS1A", type: !15, containingType: !"_ZTS1A")
 !15 = !MDSubroutineType(types: !16)
 !16 = !{null, !17}
 !17 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
-!18 = !{i32 786468}
-!19 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 5, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !20, containingType: !"_ZTS1A", variables: !24)
+!19 = !MDSubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 5, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !20, containingType: !"_ZTS1A")
 !20 = !MDSubroutineType(types: !21)
 !21 = !{!22, !17}
 !22 = !MDDerivedType(tag: DW_TAG_const_type, baseType: !23)
 !23 = !MDDerivedType(tag: DW_TAG_typedef, name: "foo_t", line: 1, file: !5, baseType: !13)
-!24 = !{i32 786468}
 !25 = !{!26, !28}
 !26 = !MDSubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !27, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1A6setFooEv, declaration: !14, variables: !2)
 !27 = !MDFile(filename: "b.cpp", directory: "")
diff --git a/test/MC/AArch64/arm64-elf-reloc-condbr.s b/test/MC/AArch64/arm64-elf-reloc-condbr.s
index 9b70a20..3182045 100644
--- a/test/MC/AArch64/arm64-elf-reloc-condbr.s
+++ b/test/MC/AArch64/arm64-elf-reloc-condbr.s
@@ -4,7 +4,7 @@
         b.eq somewhere
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_CONDBR19 somewhere 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
diff --git a/test/MC/AArch64/arm64-fp-encoding-error.s b/test/MC/AArch64/arm64-fp-encoding-error.s
new file mode 100644
index 0000000..ad30127
--- /dev/null
+++ b/test/MC/AArch64/arm64-fp-encoding-error.s
@@ -0,0 +1,8 @@
+; RUN: not llvm-mc -triple arm64-apple-ios8.0 %s -o /dev/null 2>&1 | FileCheck %s
+
+        fmov s0, #-0.0
+; CHECK: error: expected compatible register or floating-point constant
+
+        fmov d0, #-0.0
+; CHECK: error: expected compatible register or floating-point constant
+
diff --git a/test/MC/AArch64/armv8.1a-lor.s b/test/MC/AArch64/armv8.1a-lor.s
new file mode 100644
index 0000000..309cb32
--- /dev/null
+++ b/test/MC/AArch64/armv8.1a-lor.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.1a < %s | FileCheck %s
+
+
+//------------------------------------------------------------------------------
+// Load acquire / store release
+//------------------------------------------------------------------------------
+        ldlarb w0,[x1]
+        ldlarh w0,[x1]
+        ldlar  w0,[x1]
+        ldlar  x0,[x1]
+// CHECK:   ldlarb w0, [x1]   // encoding: [0x20,0x7c,0xdf,0x08]
+// CHECK:   ldlarh w0, [x1]   // encoding: [0x20,0x7c,0xdf,0x48]
+// CHECK:   ldlar  w0, [x1]   // encoding: [0x20,0x7c,0xdf,0x88]
+// CHECK:   ldlar  x0, [x1]   // encoding: [0x20,0x7c,0xdf,0xc8]
+        stllrb w0,[x1]
+        stllrh w0,[x1]
+        stllr  w0,[x1]
+        stllr  x0,[x1]
+// CHECK:   stllrb w0, [x1]   // encoding: [0x20,0x7c,0x9f,0x08]
+// CHECK:   stllrh w0, [x1]   // encoding: [0x20,0x7c,0x9f,0x48]
+// CHECK:   stllr  w0, [x1]   // encoding: [0x20,0x7c,0x9f,0x88]
+// CHECK:   stllr  x0, [x1]   // encoding: [0x20,0x7c,0x9f,0xc8]
+
+        msr    LORSA_EL1, x0
+        msr    LOREA_EL1, x0
+        msr    LORN_EL1, x0
+        msr    LORC_EL1, x0
+        mrs    x0, LORID_EL1
+// CHECK:   msr    LORSA_EL1, x0 // encoding: [0x00,0xa4,0x18,0xd5]
+// CHECK:   msr    LOREA_EL1, x0 // encoding: [0x20,0xa4,0x18,0xd5]
+// CHECK:   msr    LORN_EL1, x0  // encoding: [0x40,0xa4,0x18,0xd5]
+// CHECK:   msr    LORC_EL1, x0  // encoding: [0x60,0xa4,0x18,0xd5]
+// CHECK:   mrs    x0, LORID_EL1 // encoding: [0xe0,0xa4,0x38,0xd5]
diff --git a/test/MC/AArch64/armv8.1a-pan.s b/test/MC/AArch64/armv8.1a-pan.s
new file mode 100644
index 0000000..2068c81
--- /dev/null
+++ b/test/MC/AArch64/armv8.1a-pan.s
@@ -0,0 +1,30 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a -show-encoding < %s 2> %t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t
+
+  .text
+
+  msr pan, #0
+// CHECK:  msr PAN, #0           // encoding: [0x9f,0x40,0x00,0xd5]
+  msr pan, #1
+// CHECK:  msr PAN, #1           // encoding: [0x9f,0x41,0x00,0xd5]
+  msr pan, x5
+// CHECK:  msr PAN, x5           // encoding: [0x65,0x42,0x18,0xd5]
+  mrs x13, pan
+// CHECK:  mrs x13, PAN          // encoding: [0x6d,0x42,0x38,0xd5]
+
+  msr pan, #-1
+  msr pan, #20
+  msr pan, w0
+  mrs w0, pan
+// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
+// CHECK-ERROR:   msr pan, #-1
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
+// CHECK-ERROR:   msr pan, #20
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
+// CHECK-ERROR:   msr pan, w0
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   mrs w0, pan
+// CHECK-ERROR:       ^
diff --git a/test/MC/AArch64/armv8.1a-rdma.s b/test/MC/AArch64/armv8.1a-rdma.s
new file mode 100644
index 0000000..1de2a0f
--- /dev/null
+++ b/test/MC/AArch64/armv8.1a-rdma.s
@@ -0,0 +1,154 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a -show-encoding < %s 2> %t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+  .text
+
+  //AdvSIMD RDMA vector
+  sqrdmlah v0.4h, v1.4h, v2.4h
+  sqrdmlsh v0.4h, v1.4h, v2.4h
+  sqrdmlah v0.2s, v1.2s, v2.2s
+  sqrdmlsh v0.2s, v1.2s, v2.2s
+  sqrdmlah v0.4s, v1.4s, v2.4s
+  sqrdmlsh v0.4s, v1.4s, v2.4s
+  sqrdmlah v0.8h, v1.8h, v2.8h
+  sqrdmlsh v0.8h, v1.8h, v2.8h
+// CHECK: sqrdmlah  v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x42,0x2e]
+// CHECK: sqrdmlsh  v0.4h, v1.4h, v2.4h // encoding: [0x20,0x8c,0x42,0x2e]
+// CHECK: sqrdmlah  v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0x82,0x2e]
+// CHECK: sqrdmlsh  v0.2s, v1.2s, v2.2s // encoding: [0x20,0x8c,0x82,0x2e]
+// CHECK: sqrdmlah  v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0x82,0x6e]
+// CHECK: sqrdmlsh  v0.4s, v1.4s, v2.4s // encoding: [0x20,0x8c,0x82,0x6e]
+// CHECK: sqrdmlah  v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x42,0x6e]
+// CHECK: sqrdmlsh  v0.8h, v1.8h, v2.8h // encoding: [0x20,0x8c,0x42,0x6e]
+
+  sqrdmlah v0.2h, v1.2h, v2.2h
+  sqrdmlsh v0.2h, v1.2h, v2.2h
+  sqrdmlah v0.8s, v1.8s, v2.8s
+  sqrdmlsh v0.8s, v1.8s, v2.8s
+  sqrdmlah v0.2s, v1.4h, v2.8h
+  sqrdmlsh v0.4s, v1.8h, v2.2s
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.2s, v1.4h, v2.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.4s, v1.8h, v2.2s
+// CHECK-ERROR:                   ^
+
+  //AdvSIMD RDMA scalar
+  sqrdmlah h0, h1, h2
+  sqrdmlsh h0, h1, h2
+  sqrdmlah s0, s1, s2
+  sqrdmlsh s0, s1, s2
+// CHECK: sqrdmlah h0, h1, h2  // encoding: [0x20,0x84,0x42,0x7e]
+// CHECK: sqrdmlsh h0, h1, h2  // encoding: [0x20,0x8c,0x42,0x7e]
+// CHECK: sqrdmlah s0, s1, s2  // encoding: [0x20,0x84,0x82,0x7e]
+// CHECK: sqrdmlsh s0, s1, s2  // encoding: [0x20,0x8c,0x82,0x7e]
+
+  //AdvSIMD RDMA vector by-element
+  sqrdmlah v0.4h, v1.4h, v2.h[3]
+  sqrdmlsh v0.4h, v1.4h, v2.h[3]
+  sqrdmlah v0.2s, v1.2s, v2.s[1]
+  sqrdmlsh v0.2s, v1.2s, v2.s[1]
+  sqrdmlah v0.8h, v1.8h, v2.h[3]
+  sqrdmlsh v0.8h, v1.8h, v2.h[3]
+  sqrdmlah v0.4s, v1.4s, v2.s[3]
+  sqrdmlsh v0.4s, v1.4s, v2.s[3]
+// CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3]  // encoding: [0x20,0xd0,0x72,0x2f]
+// CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3]  // encoding: [0x20,0xf0,0x72,0x2f]
+// CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1]  // encoding: [0x20,0xd0,0xa2,0x2f]
+// CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1]  // encoding: [0x20,0xf0,0xa2,0x2f]
+// CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3]  // encoding: [0x20,0xd0,0x72,0x6f]
+// CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3]  // encoding: [0x20,0xf0,0x72,0x6f]
+// CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3]  // encoding: [0x20,0xd8,0xa2,0x6f]
+// CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3]  // encoding: [0x20,0xf8,0xa2,0x6f]
+
+  sqrdmlah v0.4s, v1.2s, v2.s[1]
+  sqrdmlsh v0.2s, v1.2d, v2.s[1]
+  sqrdmlah v0.8h, v1.8h, v2.s[3]
+  sqrdmlsh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.4s, v1.2s, v2.s[1]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.2s, v1.2d, v2.s[1]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.8h, v1.8h, v2.s[3]
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: vector lane must be an integer in range [0, 7].
+// CHECK-ERROR:   sqrdmlsh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                              ^
+
+  //AdvSIMD RDMA scalar by-element
+  sqrdmlah h0, h1, v2.h[3]
+  sqrdmlsh h0, h1, v2.h[3]
+  sqrdmlah s0, s1, v2.s[3]
+  sqrdmlsh s0, s1, v2.s[3]
+// CHECK: sqrdmlah h0, h1, v2.h[3]  // encoding: [0x20,0xd0,0x72,0x7f]
+// CHECK: sqrdmlsh h0, h1, v2.h[3]  // encoding: [0x20,0xf0,0x72,0x7f]
+// CHECK: sqrdmlah s0, s1, v2.s[3]  // encoding: [0x20,0xd8,0xa2,0x7f]
+// CHECK: sqrdmlsh s0, s1, v2.s[3]  // encoding: [0x20,0xf8,0xa2,0x7f]
+
+  sqrdmlah b0, h1, v2.h[3]
+  sqrdmlah s0, d1, v2.s[3]
+  sqrdmlsh h0, h1, v2.s[3]
+  sqrdmlsh s0, s1, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah b0, h1, v2.h[3]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah s0, d1, v2.s[3]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh h0, h1, v2.s[3]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: vector lane must be an integer in range [0, 3].
+// CHECK-ERROR:   sqrdmlsh s0, s1, v2.s[4]
+// CHECK-ERROR:                        ^
diff --git a/test/MC/AArch64/armv8.1a-vhe.s b/test/MC/AArch64/armv8.1a-vhe.s
new file mode 100644
index 0000000..2bee4e2
--- /dev/null
+++ b/test/MC/AArch64/armv8.1a-vhe.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.1a < %s | FileCheck %s
+
+
+//------------------------------------------------------------------------------
+// Virtualization Host Extensions
+//------------------------------------------------------------------------------
+    msr TTBR1_EL2, x0
+    msr CONTEXTIDR_EL2, x0
+    msr CNTHV_TVAL_EL2, x0
+    msr CNTHV_CVAL_EL2, x0
+    msr CNTHV_CTL_EL2, x0
+    msr SCTLR_EL12, x0
+    msr CPACR_EL12, x0
+    msr TTBR0_EL12, x0
+    msr TTBR1_EL12, x0
+    msr TCR_EL12, x0
+    msr AFSR0_EL12, x0
+    msr AFSR1_EL12, x0
+    msr ESR_EL12, x0
+    msr FAR_EL12, x0
+    msr MAIR_EL12, x0
+    msr AMAIR_EL12, x0
+    msr VBAR_EL12, x0
+    msr CONTEXTIDR_EL12, x0
+    msr CNTKCTL_EL12, x0
+    msr CNTP_TVAL_EL02, x0
+    msr CNTP_CTL_EL02, x0
+    msr CNTP_CVAL_EL02, x0
+    msr CNTV_TVAL_EL02, x0
+    msr CNTV_CTL_EL02, x0
+    msr CNTV_CVAL_EL02, x0
+    msr SPSR_EL12, x0
+    msr ELR_EL12, x0
+
+// CHECK:   msr TTBR1_EL2, x0           // encoding: [0x20,0x20,0x1c,0xd5]
+// CHECK:   msr CONTEXTIDR_EL2, x0      // encoding: [0x20,0xd0,0x1c,0xd5]
+// CHECK:   msr CNTHV_TVAL_EL2, x0      // encoding: [0x00,0xe3,0x1c,0xd5]
+// CHECK:   msr CNTHV_CVAL_EL2, x0      // encoding: [0x40,0xe3,0x1c,0xd5]
+// CHECK:   msr CNTHV_CTL_EL2, x0       // encoding: [0x20,0xe3,0x1c,0xd5]
+// CHECK:   msr SCTLR_EL12, x0          // encoding: [0x00,0x10,0x1d,0xd5]
+// CHECK:   msr CPACR_EL12, x0          // encoding: [0x40,0x10,0x1d,0xd5]
+// CHECK:   msr TTBR0_EL12, x0          // encoding: [0x00,0x20,0x1d,0xd5]
+// CHECK:   msr TTBR1_EL12, x0          // encoding: [0x20,0x20,0x1d,0xd5]
+// CHECK:   msr TCR_EL12, x0            // encoding: [0x40,0x20,0x1d,0xd5]
+// CHECK:   msr AFSR0_EL12, x0          // encoding: [0x00,0x51,0x1d,0xd5]
+// CHECK:   msr AFSR1_EL12, x0          // encoding: [0x20,0x51,0x1d,0xd5]
+// CHECK:   msr ESR_EL12, x0            // encoding: [0x00,0x52,0x1d,0xd5]
+// CHECK:   msr FAR_EL12, x0            // encoding: [0x00,0x60,0x1d,0xd5]
+// CHECK:   msr MAIR_EL12, x0           // encoding: [0x00,0xa2,0x1d,0xd5]
+// CHECK:   msr AMAIR_EL12, x0          // encoding: [0x00,0xa3,0x1d,0xd5]
+// CHECK:   msr VBAR_EL12, x0           // encoding: [0x00,0xc0,0x1d,0xd5]
+// CHECK:   msr CONTEXTIDR_EL12, x0     // encoding: [0x20,0xd0,0x1d,0xd5]
+// CHECK:   msr CNTKCTL_EL12, x0        // encoding: [0x00,0xe1,0x1d,0xd5]
+// CHECK:   msr CNTP_TVAL_EL02, x0      // encoding: [0x00,0xe2,0x1d,0xd5]
+// CHECK:   msr CNTP_CTL_EL02, x0       // encoding: [0x20,0xe2,0x1d,0xd5]
+// CHECK:   msr CNTP_CVAL_EL02, x0      // encoding: [0x40,0xe2,0x1d,0xd5]
+// CHECK:   msr CNTV_TVAL_EL02, x0      // encoding: [0x00,0xe3,0x1d,0xd5]
+// CHECK:   msr CNTV_CTL_EL02, x0       // encoding: [0x20,0xe3,0x1d,0xd5]
+// CHECK:   msr CNTV_CVAL_EL02, x0      // encoding: [0x40,0xe3,0x1d,0xd5]
+// CHECK:   msr SPSR_EL12, x0           // encoding: [0x00,0x40,0x1d,0xd5]
+// CHECK:   msr ELR_EL12, x0            // encoding: [0x20,0x40,0x1d,0xd5]
diff --git a/test/MC/AArch64/elf-extern.s b/test/MC/AArch64/elf-extern.s
index dfa3fb0..14c26c1 100644
--- a/test/MC/AArch64/elf-extern.s
+++ b/test/MC/AArch64/elf-extern.s
@@ -27,7 +27,7 @@ check_extern:                           // @check_extern
 
 
 // CHECK: Relocations [
-// CHECK:   Section (2) .rela.text {
+// CHECK:   Section {{.*}} .rela.text {
 // CHECK:     0x{{[0-9,A-F]+}} R_AARCH64_CALL26 memcpy
 // CHECK:   }
 // CHECK: ]
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
index 8e4ae4c..d8a0b5b 100644
--- a/test/MC/AArch64/elf-globaladdress.ll
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -40,7 +40,7 @@ define void @address() {
 ; OBJ: }
 
 ; OBJ: Relocations [
-; OBJ:   Section (2) .rela.text {
+; OBJ:   Section {{.*}} .rela.text {
 ; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var8
 ; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC  var8
 ; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var16
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
index e37991b..58e9a6e 100644
--- a/test/MC/AArch64/elf-reloc-addsubimm.s
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -4,7 +4,7 @@
         add x2, x3, #:lo12:some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_ADD_ABS_LO12_NC some_label 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
index d4c3a4e..017d66c 100644
--- a/test/MC/AArch64/elf-reloc-ldrlit.s
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -7,7 +7,7 @@
         prfm pldl3keep, some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_LD_PREL_LO19 some_label 0x0
 // OBJ-NEXT:     0x4 R_AARCH64_LD_PREL_LO19 some_label 0x0
 // OBJ-NEXT:     0x8 R_AARCH64_LD_PREL_LO19 some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
index 371e7e5..e68937c 100644
--- a/test/MC/AArch64/elf-reloc-ldstunsimm.s
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -8,7 +8,7 @@
         str q0, [sp, #:lo12:some_label]
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0  R_AARCH64_LDST8_ABS_LO12_NC   some_label 0x0
 // OBJ-NEXT:     0x4  R_AARCH64_LDST16_ABS_LO12_NC  some_label 0x0
 // OBJ-NEXT:     0x8  R_AARCH64_LDST32_ABS_LO12_NC  some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
index 3331595..fda160f 100644
--- a/test/MC/AArch64/elf-reloc-movw.s
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -23,7 +23,7 @@
         movn x19, #:abs_g2_s:some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0  R_AARCH64_MOVW_UABS_G0    some_label 0x0
 // OBJ-NEXT:     0x4  R_AARCH64_MOVW_UABS_G0_NC some_label 0x0
 // OBJ-NEXT:     0x8  R_AARCH64_MOVW_UABS_G1    some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
index 093891d..30acb6d 100644
--- a/test/MC/AArch64/elf-reloc-pcreladdressing.s
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -8,7 +8,7 @@
         ldr x0, [x5, #:got_lo12:some_label]
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_ADR_PREL_LO21    some_label 0x0
 // OBJ-NEXT:     0x4 R_AARCH64_ADR_PREL_PG_HI21 some_label 0x0
 // OBJ-NEXT:     0x8 R_AARCH64_ADR_GOT_PAGE     some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
index 25c9816..e6828e6 100644
--- a/test/MC/AArch64/elf-reloc-tstb.s
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -5,7 +5,7 @@
         tbnz w3, #15, somewhere
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0  R_AARCH64_TSTBR14 somewhere 0x0
 // OBJ-NEXT:     0x4  R_AARCH64_TSTBR14 somewhere 0x0
 // OBJ-NEXT:   }
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
index 9ac66bd..ff852be 100644
--- a/test/MC/AArch64/elf-reloc-uncondbrimm.s
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -5,7 +5,7 @@
         bl somewhere
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:   Section {{.*}} .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_JUMP26 somewhere 0x0
 // OBJ-NEXT:     0x4 R_AARCH64_CALL26 somewhere 0x0
 // OBJ-NEXT:   }
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index 9e94a52..bac4f20 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -18,7 +18,7 @@
 // CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF:      Relocations [
-// CHECK-ELF-NEXT:   Section (2) .rela.text {
+// CHECK-ELF-NEXT:   Section {{.*}} .rela.text {
 // CHECK-ELF-NEXT:     0x0 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM:[^ ]+]]
 // CHECK-ELF-NEXT:     0x4 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x8 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
diff --git a/test/MC/ARM/2010-11-30-reloc-movt.s b/test/MC/ARM/2010-11-30-reloc-movt.s
index 9de88f0..dc6960b 100644
--- a/test/MC/ARM/2010-11-30-reloc-movt.s
+++ b/test/MC/ARM/2010-11-30-reloc-movt.s
@@ -34,6 +34,7 @@ barf:                                   @ @barf
 // CHECK-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
 // CHECK-NEXT:       0010: 0088BDE8
 // CHECK-NEXT:     )
+// CHECK:          Name: .rel.text
 // CHECK:          Relocations [
 // CHECK-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
 // CHECK-NEXT:       0x8 R_ARM_MOVT_ABS
diff --git a/test/MC/ARM/arm-elf-symver.s b/test/MC/ARM/arm-elf-symver.s
index 5fb1f6a..26d7655 100644
--- a/test/MC/ARM/arm-elf-symver.s
+++ b/test/MC/ARM/arm-elf-symver.s
@@ -23,7 +23,7 @@ defined3:
 global1:
 
 @ CHECK: Relocations [
-@ CHECK-NEXT:   Section (2) .rel.text {
+@ CHECK-NEXT:   Section {{.*}} .rel.text {
 @ CHECK-NEXT:     0x0 R_ARM_ABS32 .text 0x0
 @ CHECK-NEXT:     0x4 R_ARM_ABS32 bar2@zed 0x0
 @ CHECK-NEXT:     0x8 R_ARM_ABS32 .text 0x0
@@ -93,7 +93,7 @@ global1:
 @ CHECK-NEXT:     Binding: Local (0x0)
 @ CHECK-NEXT:     Type: Section (0x3)
 @ CHECK-NEXT:     Other: 0
-@ CHECK-NEXT:     Section: .data (0x3)
+@ CHECK-NEXT:     Section: .data
 @ CHECK-NEXT:   }
 @ CHECK-NEXT:   Symbol {
 @ CHECK-NEXT:     Name: .bss (0)
@@ -102,7 +102,7 @@ global1:
 @ CHECK-NEXT:     Binding: Local (0x0)
 @ CHECK-NEXT:     Type: Section (0x3)
 @ CHECK-NEXT:     Other: 0
-@ CHECK-NEXT:     Section: .bss (0x4)
+@ CHECK-NEXT:     Section: .bss
 @ CHECK-NEXT:   }
 @ CHECK-NEXT:   Symbol {
 @ CHECK-NEXT:     Name: g1@@zed
diff --git a/test/MC/ARM/basic-arm-instructions-v8.1a.s b/test/MC/ARM/basic-arm-instructions-v8.1a.s
index f46057b6..005f27b 100644
--- a/test/MC/ARM/basic-arm-instructions-v8.1a.s
+++ b/test/MC/ARM/basic-arm-instructions-v8.1a.s
@@ -43,28 +43,28 @@
   vqrdmlah.s16    d0, d1, d2
 //CHECK-V81aARM:   vqrdmlah.s16  d0, d1, d2      @ encoding: [0x12,0x0b,0x11,0xf3]
 //CHECK-V81aTHUMB: vqrdmlah.s16  d0, d1, d2      @ encoding: [0x11,0xff,0x12,0x0b]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s16    d0, d1, d2
 //CHECK-V8:  ^
 
   vqrdmlah.s32  d0, d1, d2
 //CHECK-V81aARM:   vqrdmlah.s32  d0, d1, d2      @ encoding: [0x12,0x0b,0x21,0xf3]
 //CHECK-V81aTHUMB: vqrdmlah.s32  d0, d1, d2      @ encoding: [0x21,0xff,0x12,0x0b]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s32  d0, d1, d2
 //CHECK-V8:  ^
 
   vqrdmlah.s16  q0, q1, q2
 //CHECK-V81aARM:   vqrdmlah.s16  q0, q1, q2      @ encoding: [0x54,0x0b,0x12,0xf3]
 //CHECK-V81aTHUMB: vqrdmlah.s16  q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0b]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s16  q0, q1, q2
 //CHECK-V8:  ^
 
   vqrdmlah.s32  q2, q3, q0
 //CHECK-V81aARM:   vqrdmlah.s32  q2, q3, q0      @ encoding: [0x50,0x4b,0x26,0xf3]
 //CHECK-V81aTHUMB: vqrdmlah.s32  q2, q3, q0      @ encoding: [0x26,0xff,0x50,0x4b]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s32  q2, q3, q0
 //CHECK-V8:  ^
 
@@ -72,28 +72,28 @@
   vqrdmlsh.s16  d7, d6, d5
 //CHECK-V81aARM:   vqrdmlsh.s16  d7, d6, d5      @ encoding: [0x15,0x7c,0x16,0xf3]
 //CHECK-V81aTHUMB: vqrdmlsh.s16  d7, d6, d5      @ encoding: [0x16,0xff,0x15,0x7c]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s16  d7, d6, d5
 //CHECK-V8:  ^
 
   vqrdmlsh.s32  d0, d1, d2
 //CHECK-V81aARM:   vqrdmlsh.s32  d0, d1, d2      @ encoding: [0x12,0x0c,0x21,0xf3]
 //CHECK-V81aTHUMB: vqrdmlsh.s32  d0, d1, d2      @ encoding: [0x21,0xff,0x12,0x0c]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s32  d0, d1, d2
 //CHECK-V8:  ^
 
   vqrdmlsh.s16  q0, q1, q2
 //CHECK-V81aARM:   vqrdmlsh.s16  q0, q1, q2      @ encoding: [0x54,0x0c,0x12,0xf3]
 //CHECK-V81aTHUMB: vqrdmlsh.s16  q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0c]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s16  q0, q1, q2
 //CHECK-V8:  ^
 
   vqrdmlsh.s32    q3, q4, q5
 //CHECK-V81aARM:   vqrdmlsh.s32  q3, q4, q5      @ encoding: [0x5a,0x6c,0x28,0xf3]
 //CHECK-V81aTHUMB: vqrdmlsh.s32  q3, q4, q5      @ encoding: [0x28,0xff,0x5a,0x6c]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s32  q3, q4, q5
 //CHECK-V8:  ^
 
@@ -119,28 +119,28 @@
   vqrdmlah.s16  d0, d1, d2[0]
 //CHECK-V81aARM:   vqrdmlah.s16 d0, d1, d2[0]    @ encoding: [0x42,0x0e,0x91,0xf2]
 //CHECK-V81aTHUMB: vqrdmlah.s16  d0, d1, d2[0]   @ encoding: [0x91,0xef,0x42,0x0e]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s16  d0, d1, d2[0]
 //CHECK-V8:  ^
 
   vqrdmlah.s32  d0, d1, d2[0]
 //CHECK-V81aARM:   vqrdmlah.s32 d0, d1, d2[0]    @ encoding: [0x42,0x0e,0xa1,0xf2]
 //CHECK-V81aTHUMB: vqrdmlah.s32  d0, d1, d2[0]   @ encoding: [0xa1,0xef,0x42,0x0e]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s32  d0, d1, d2[0]
 //CHECK-V8:  ^
 
   vqrdmlah.s16  q0, q1, d2[0]
 //CHECK-V81aARM:   vqrdmlah.s16  q0, q1, d2[0]   @ encoding: [0x42,0x0e,0x92,0xf3]
 //CHECK-V81aTHUMB: vqrdmlah.s16  q0, q1, d2[0]   @ encoding: [0x92,0xff,0x42,0x0e]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s16  q0, q1, d2[0]
 //CHECK-V8:  ^
 
   vqrdmlah.s32  q0, q1, d2[0]
 //CHECK-V81aARM:   vqrdmlah.s32  q0, q1, d2[0]   @ encoding: [0x42,0x0e,0xa2,0xf3]
 //CHECK-V81aTHUMB: vqrdmlah.s32  q0, q1, d2[0]   @ encoding: [0xa2,0xff,0x42,0x0e]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlah.s32  q0, q1, d2[0]
 //CHECK-V8:  ^
 
@@ -148,27 +148,59 @@
   vqrdmlsh.s16  d0, d1, d2[0]
 //CHECK-V81aARM:   vqrdmlsh.s16 d0, d1, d2[0]    @ encoding: [0x42,0x0f,0x91,0xf2]
 //CHECK-V81aTHUMB: vqrdmlsh.s16  d0, d1, d2[0]   @ encoding: [0x91,0xef,0x42,0x0f]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s16  d0, d1, d2[0]
 //CHECK-V8:  ^
 
   vqrdmlsh.s32  d0, d1, d2[0]
 //CHECK-V81aARM:   vqrdmlsh.s32 d0, d1, d2[0]    @ encoding: [0x42,0x0f,0xa1,0xf2]
 //CHECK-V81aTHUMB: vqrdmlsh.s32  d0, d1, d2[0]   @ encoding: [0xa1,0xef,0x42,0x0f]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s32  d0, d1, d2[0]
 //CHECK-V8:  ^
 
   vqrdmlsh.s16  q0, q1, d2[0]
 //CHECK-V81aARM:   vqrdmlsh.s16 q0, q1, d2[0]    @ encoding: [0x42,0x0f,0x92,0xf3]
 //CHECK-V81aTHUMB: vqrdmlsh.s16  q0, q1, d2[0]   @ encoding: [0x92,0xff,0x42,0x0f]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s16  q0, q1, d2[0]
 //CHECK-V8:  ^
 
   vqrdmlsh.s32  q0, q1, d2[0]
 //CHECK-V81aARM:   vqrdmlsh.s32 q0, q1, d2[0]    @ encoding: [0x42,0x0f,0xa2,0xf3]
 //CHECK-V81aTHUMB: vqrdmlsh.s32  q0, q1, d2[0]   @ encoding: [0xa2,0xff,0x42,0x0f]
-//CHECK-V8: error: instruction requires: v8.1a
+//CHECK-V8: error: instruction requires: armv8.1a
 //CHECK-V8:  vqrdmlsh.s32  q0, q1, d2[0]
 //CHECK-V8:  ^
+
+  setpan  #0
+//CHECK-V81aTHUMB:  setpan  #0                @       encoding: [0x10,0xb6]
+//CHECK-V81aARM:    setpan  #0                @       encoding: [0x00,0x00,0x10,0xf1]
+//CHECK-V8: error: instruction requires: armv8.1a
+//CHECK-V8:  setpan  #0
+//CHECK-V8:  ^
+
+  setpan  #1
+//CHECK-V81aTHUMB:  setpan  #1                @       encoding: [0x18,0xb6]
+//CHECK-V81aARM:    setpan  #1                @       encoding: [0x00,0x02,0x10,0xf1]
+//CHECK-V8: error: instruction requires: armv8.1a
+//CHECK-V8:  setpan  #1
+//CHECK-V8:  ^
+  setpan
+  setpan #-1
+  setpan #2
+//CHECK-ERROR: error: too few operands for instruction
+//CHECK-ERROR:  setpan
+//CHECK-ERROR:  ^
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR:  setpan #-1
+//CHECK-ERROR:         ^
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR:  setpan #2
+//CHECK-ERROR:         ^
+
+  it eq
+  setpaneq #0
+//CHECK-THUMB-ERROR: error: instruction 'setpan' is not predicable, but condition code specified
+//CHECK-THUMB-ERROR:  setpaneq #0
+//CHECK-THUMB-ERROR:  ^
diff --git a/test/MC/ARM/eh-compact-pr0.s b/test/MC/ARM/eh-compact-pr0.s
index 1d825bf..9c0581a 100644
--- a/test/MC/ARM/eh-compact-pr0.s
+++ b/test/MC/ARM/eh-compact-pr0.s
@@ -1,5 +1,7 @@
 @ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
-@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+@ RUN:   | llvm-readobj -s -sd -sr > %t
+@ RUN: FileCheck %s < %t
+@ RUN: FileCheck --check-prefix=RELOC %s < %t
 
 @ Check the compact pr0 model
 
@@ -63,10 +65,13 @@ func2:
 @ another relocation entry for __aeabi_unwind_cpp_pr0, so that the linker
 @ will keep __aeabi_unwind_cpp_pr0.
 @-------------------------------------------------------------------------------
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
-@ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
-@ CHECK:     ]
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.exidx.TEST1
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 .TEST1 0x0
+@ RELOC:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:     ]
+@ RELOC:   }
 
 
 @-------------------------------------------------------------------------------
@@ -92,13 +97,15 @@ func2:
 @ CHECK:       0000: 00000000 B0808480                    |........|
 @ CHECK:     )
 @ CHECK:   }
-@ CHECK: ]
 @-------------------------------------------------------------------------------
 @ The first word should be relocated to .TEST2 section.  Besides, there is
 @ another relocation entry for __aeabi_unwind_cpp_pr0, so that the linker
 @ will keep __aeabi_unwind_cpp_pr0.
 @-------------------------------------------------------------------------------
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 .TEST2 0x0
-@ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
-@ CHECK:     ]
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.exidx.TEST2
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 .TEST2 0x0
+@ RELOC:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:     ]
+@ RELOC:   }
diff --git a/test/MC/ARM/eh-directive-handlerdata.s b/test/MC/ARM/eh-directive-handlerdata.s
index 793d357..980a5f0 100644
--- a/test/MC/ARM/eh-directive-handlerdata.s
+++ b/test/MC/ARM/eh-directive-handlerdata.s
@@ -1,5 +1,7 @@
 @ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
-@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+@ RUN:   | llvm-readobj -s -sd -sr > %t
+@ RUN: FileCheck %s < %t
+@ RUN: FileCheck --check-prefix=RELOC %s < %t
 
 @ Check the .handlerdata directive (without .personality directive)
 
@@ -43,11 +45,14 @@ func1:
 @ We should see a relocation entry to __aeabi_unwind_cpp_pr0, so that the
 @ linker can keep __aeabi_unwind_cpp_pr0.
 @-------------------------------------------------------------------------------
-@ CHECK:  Relocations [
-@ CHECK:    0x0 R_ARM_PREL31 .TEST1 0x0
-@ CHECK:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
-@ CHECK:    0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
-@ CHECK:  ]
+@ RELOC: Section {
+@ RELOC:  Name: .rel.ARM.exidx.TEST1
+@ RELOC:  Relocations [
+@ RELOC:    0x0 R_ARM_PREL31 .TEST1 0x0
+@ RELOC:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:    0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ RELOC:  ]
+@ RELOC: }
 
 
 
@@ -100,8 +105,11 @@ func2:
 @ We should see a relocation entry to __aeabi_unwind_cpp_pr0, so that the
 @ linker can keep __aeabi_unwind_cpp_pr0.
 @-------------------------------------------------------------------------------
-@ CHECK:  Relocations [
-@ CHECK:    0x0 R_ARM_PREL31 .TEST2 0x0
-@ CHECK:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
-@ CHECK:    0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
-@ CHECK:  ]
+@ RELOC: Section {
+@ RELOC:  Name: .rel.ARM.exidx.TEST2
+@ RELOC:  Relocations [
+@ RELOC:    0x0 R_ARM_PREL31 .TEST2 0x0
+@ RELOC:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC:    0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
+@ RELOC:  ]
+@ RELOC: }
diff --git a/test/MC/ARM/eh-directive-personality.s b/test/MC/ARM/eh-directive-personality.s
index f493722..84e62bd 100644
--- a/test/MC/ARM/eh-directive-personality.s
+++ b/test/MC/ARM/eh-directive-personality.s
@@ -1,5 +1,7 @@
 @ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
-@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+@ RUN:   | llvm-readobj -s -sd -sr > %t
+@ RUN: FileCheck %s < %t
+@ RUN: FileCheck --check-prefix=RELOC %s < %t
 
 @ Check the .personality directive.
 
@@ -32,19 +34,28 @@ func1:
 @ CHECK:     0000: 00000000 B0B0B000                    |........|
 @ CHECK:   )
 @ CHECK: }
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
-@ CHECK:   ]
+
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.extab.TEST1
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ RELOC:   ]
+@ RELOC: }
+
 @ CHECK: Section {
 @ CHECK:   Name: .ARM.exidx.TEST1
 @ CHECK:   SectionData (
 @ CHECK:     0000: 00000000 00000000                    |........|
 @ CHECK:   )
 @ CHECK: }
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .TEST1 0x0
-@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
-@ CHECK:   ]
+
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.TEST1
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .TEST1 0x0
+@ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ RELOC:   ]
+@ RELOC: }
 
 
 @-------------------------------------------------------------------------------
@@ -74,16 +85,25 @@ func2:
 @ CHECK:     0000: 00000000 B0B0B000                    |........|
 @ CHECK:   )
 @ CHECK: }
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
-@ CHECK:   ]
+
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.extab.TEST2
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ RELOC:   ]
+@ RELOC: }
+
 @ CHECK: Section {
 @ CHECK:   Name: .ARM.exidx.TEST2
 @ CHECK:   SectionData (
 @ CHECK:     0000: 00000000 00000000                    |........|
 @ CHECK:   )
 @ CHECK: }
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .TEST2 0x0
-@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
-@ CHECK:   ]
+
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.TEST2
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .TEST2 0x0
+@ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
+@ RELOC:   ]
+@ RELOC: }
diff --git a/test/MC/ARM/eh-directive-personalityindex.s b/test/MC/ARM/eh-directive-personalityindex.s
index 5517227..6db9425 100644
--- a/test/MC/ARM/eh-directive-personalityindex.s
+++ b/test/MC/ARM/eh-directive-personalityindex.s
@@ -1,5 +1,7 @@
 @ RUN: llvm-mc -triple armv7-linux-eabi -filetype obj -o - %s \
-@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+@ RUN:   | llvm-readobj -s -sd -sr > %t
+@ RUN: FileCheck %s < %t
+@ RUN: FileCheck --check-prefix=RELOC %s < %t
 
 	.syntax unified
 	.thumb
@@ -23,13 +25,13 @@ pr0:
 @ CHECK:   )
 @ CHECK: }
 
-@ CHECK: Section {
-@ CHECK:   Name: .rel.ARM.exidx.pr0
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .pr0 0x0
-@ CHECK:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
-@ CHECK:   ]
-@ CHECK: }
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.pr0
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .pr0 0x0
+@ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:   ]
+@ RELOC: }
 
 	.section .pr0.nontrivial
 
@@ -52,13 +54,13 @@ pr0_nontrivial:
 @ CHECK:   )
 @ CHECK: }
 
-@ CHECK: Section {
-@ CHECK:   Name: .rel.ARM.exidx.pr0.nontrivial
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .pr0.nontrivial 0x0
-@ CHECK:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
-@ CHECK:   ]
-@ CHECK: }
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.pr0.nontrivial
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .pr0.nontrivial 0x0
+@ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:   ]
+@ RELOC: }
 
 	.section .pr1
 
@@ -85,14 +87,14 @@ pr1:
 @ CHECK:   )
 @ CHECK: }
 
-@ CHECK: Section {
-@ CHECK:   Name: .rel.ARM.exidx.pr1
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .pr1 0x0
-@ CHECK:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
-@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.pr1 0x0
-@ CHECK:   ]
-@ CHECK: }
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.pr1
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .pr1 0x0
+@ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr1 0x0
+@ RELOC:   ]
+@ RELOC: }
 
 	.section .pr1.nontrivial
 
@@ -122,14 +124,14 @@ pr1_nontrivial:
 @ CHECK:   )
 @ CHECK: }
 
-@ CHECK: Section {
-@ CHECK:   Name: .rel.ARM.exidx.pr1.nontrivial
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .pr1.nontrivial 0x0
-@ CHECK:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
-@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.pr1.nontrivial 0x0
-@ CHECK:   ]
-@ CHECK: }
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.pr1.nontrivial
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .pr1.nontrivial 0x0
+@ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr1.nontrivial 0x0
+@ RELOC:   ]
+@ RELOC: }
 
 	.section .pr2
 
@@ -156,14 +158,14 @@ pr2:
 @ CHECK:   )
 @ CHECK: }
 
-@ CHECK: Section {
-@ CHECK:   Name: .rel.ARM.exidx.pr2
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .pr2 0x0
-@ CHECK:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
-@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.pr2 0x0
-@ CHECK:   ]
-@ CHECK: }
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.pr2
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .pr2 0x0
+@ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
+@ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr2 0x0
+@ RELOC:   ]
+@ RELOC: }
 
 	.section .pr2.nontrivial
 	.type pr2_nontrivial,%function
@@ -191,12 +193,11 @@ pr2_nontrivial:
 @ CHECK:   )
 @ CHECK: }
 
-@ CHECK: Section {
-@ CHECK:   Name: .rel.ARM.exidx.pr2.nontrivial
-@ CHECK:   Relocations [
-@ CHECK:     0x0 R_ARM_PREL31 .pr2.nontrivial 0x0
-@ CHECK:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
-@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.pr2.nontrivial 0x0
-@ CHECK:   ]
-@ CHECK: }
-
+@ RELOC: Section {
+@ RELOC:   Name: .rel.ARM.exidx.pr2.nontrivial
+@ RELOC:   Relocations [
+@ RELOC:     0x0 R_ARM_PREL31 .pr2.nontrivial 0x0
+@ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
+@ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr2.nontrivial 0x0
+@ RELOC:   ]
+@ RELOC: }
diff --git a/test/MC/ARM/eh-directive-section-multiple-func.s b/test/MC/ARM/eh-directive-section-multiple-func.s
index 9f632b8..e7198a4 100644
--- a/test/MC/ARM/eh-directive-section-multiple-func.s
+++ b/test/MC/ARM/eh-directive-section-multiple-func.s
@@ -1,5 +1,7 @@
 @ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
-@ RUN:   | llvm-readobj -s -sd -sr -t | FileCheck %s
+@ RUN:   | llvm-readobj -s -sd -sr -t > %t
+@ RUN: FileCheck %s < %t
+@ RUN: FileCheck --check-prefix=RELOC %s < %t
 
 @ Check whether the section is switched back properly.
 
@@ -69,10 +71,14 @@ func2:
 @ CHECK:       0000: 00000000 B0B0B000 00000000 B0B0B000  |................|
 @ CHECK:     )
 @ CHECK:   }
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
-@ CHECK:       0x8 R_ARM_PREL31 __gxx_personality_v0 0x0
-@ CHECK:     ]
+
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.extab.TEST1
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ RELOC:       0x8 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ RELOC:     ]
+@ RELOC:   }
 
 
 @-------------------------------------------------------------------------------
@@ -89,18 +95,21 @@ func2:
 @ CHECK:       0000: 00000000 00000000 04000000 08000000  |................|
 @ CHECK:     )
 @ CHECK:   }
-@ CHECK: ]
 @-------------------------------------------------------------------------------
 @ The first word of each entry should be relocated to .TEST1 section.
 @ The second word of each entry should be relocated to
 @ .ARM.extab.TESET1 section.
 @-------------------------------------------------------------------------------
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
-@ CHECK:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
-@ CHECK:       0x8 R_ARM_PREL31 .TEST1 0x0
-@ CHECK:       0xC R_ARM_PREL31 .ARM.extab.TEST1 0x0
-@ CHECK:     ]
+
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.exidx.TEST1
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 .TEST1 0x0
+@ RELOC:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ RELOC:       0x8 R_ARM_PREL31 .TEST1 0x0
+@ RELOC:       0xC R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ RELOC:     ]
+@ RELOC:   }
 
 
 @-------------------------------------------------------------------------------
diff --git a/test/MC/ARM/eh-directive-section.s b/test/MC/ARM/eh-directive-section.s
index 7c1f32e..671d106 100644
--- a/test/MC/ARM/eh-directive-section.s
+++ b/test/MC/ARM/eh-directive-section.s
@@ -1,5 +1,7 @@
 @ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
-@ RUN:   | llvm-readobj -s -sd -sr -t | FileCheck %s
+@ RUN:   | llvm-readobj -s -sd -sr -t > %t
+@ RUN: FileCheck %s < %t
+@ RUN: FileCheck --check-prefix=RELOC %s < %t
 
 @ Check the combination of .section, .fnstart, and .fnend directives.
 
@@ -64,9 +66,13 @@ func2:
 @ CHECK:       0000: 00000000 B0B0B000                    |........|
 @ CHECK:     )
 @ CHECK:   }
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
-@ CHECK:     ]
+
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.extab.TEST1
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ RELOC:     ]
+@ RELOC:   }
 
 
 @-------------------------------------------------------------------------------
@@ -89,10 +95,14 @@ func2:
 @ CHECK:       0000: 00000000 00000000                    |........|
 @ CHECK:     )
 @ CHECK:   }
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
-@ CHECK:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
-@ CHECK:     ]
+
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.exidx.TEST1
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 .TEST1 0x0
+@ RELOC:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ RELOC:     ]
+@ RELOC:   }
 
 
 @-------------------------------------------------------------------------------
@@ -115,9 +125,13 @@ func2:
 @ CHECK:       0000: 00000000 B0B0B000                    |........|
 @ CHECK:     )
 @ CHECK:   }
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
-@ CHECK:     ]
+
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.extabTEST2
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ RELOC:     ]
+@ RELOC:   }
 
 
 @-------------------------------------------------------------------------------
@@ -140,11 +154,14 @@ func2:
 @ CHECK:       0000: 00000000 00000000                    |........|
 @ CHECK:     )
 @ CHECK:   }
-@ CHECK: ]
-@ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 TEST2 0x0
-@ CHECK:       0x4 R_ARM_PREL31 .ARM.extabTEST2 0x0
-@ CHECK:     ]
+
+@ RELOC:   Section {
+@ RELOC:     Name: .rel.ARM.exidxTEST2
+@ RELOC:     Relocations [
+@ RELOC:       0x0 R_ARM_PREL31 TEST2 0x0
+@ RELOC:       0x4 R_ARM_PREL31 .ARM.extabTEST2 0x0
+@ RELOC:     ]
+@ RELOC:   }
 
 
 
diff --git a/test/MC/ARM/eh-link.s b/test/MC/ARM/eh-link.s
new file mode 100644
index 0000000..0c44c0e
--- /dev/null
+++ b/test/MC/ARM/eh-link.s
@@ -0,0 +1,90 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s  | FileCheck %s
+
+@ Test that the ARM_EXIDX sections point (Link) to the corresponding text
+@ sections.
+
+@ FIXME: The section numbers are not important. If llvm-readobj printed the
+@ name first we could use a FileCheck variable.
+
+@ CHECK:      Section {
+@ CHECK:        Index: 6
+@ CHECK-NEXT:   Name: .text
+@ CHECK-NEXT:   Type: SHT_PROGBITS
+@ CHECK-NEXT:   Flags [
+@ CHECK-NEXT:     SHF_ALLOC
+@ CHECK-NEXT:     SHF_EXECINSTR
+@ CHECK-NEXT:     SHF_GROUP
+@ CHECK-NEXT:   ]
+@ CHECK-NEXT:   Address: 0x0
+@ CHECK-NEXT:   Offset: 0x54
+@ CHECK-NEXT:   Size: 4
+@ CHECK-NEXT:   Link: 0
+@ CHECK-NEXT:   Info: 0
+@ CHECK-NEXT:   AddressAlignment: 1
+@ CHECK-NEXT:   EntrySize: 0
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Section {
+@ CHECK-NEXT:   Index: 7
+@ CHECK-NEXT:   Name: .ARM.exidx
+@ CHECK-NEXT:   Type: SHT_ARM_EXIDX
+@ CHECK-NEXT:   Flags [
+@ CHECK-NEXT:     SHF_ALLOC
+@ CHECK-NEXT:     SHF_GROUP
+@ CHECK-NEXT:     SHF_LINK_ORDER
+@ CHECK-NEXT:   ]
+@ CHECK-NEXT:   Address: 0x0
+@ CHECK-NEXT:   Offset: 0x58
+@ CHECK-NEXT:   Size: 8
+@ CHECK-NEXT:   Link: 6
+@ CHECK-NEXT:   Info: 0
+@ CHECK-NEXT:   AddressAlignment: 4
+@ CHECK-NEXT:   EntrySize: 0
+@ CHECK-NEXT: }
+
+@ CHECK:      Section {
+@ CHECK:        Index: 9
+@ CHECK-NEXT:   Name: .text
+@ CHECK-NEXT:   Type: SHT_PROGBITS
+@ CHECK-NEXT:   Flags [
+@ CHECK-NEXT:     SHF_ALLOC
+@ CHECK-NEXT:     SHF_EXECINSTR
+@ CHECK-NEXT:     SHF_GROUP
+@ CHECK-NEXT:   ]
+@ CHECK-NEXT:   Address: 0x0
+@ CHECK-NEXT:   Offset:
+@ CHECK-NEXT:   Size: 4
+@ CHECK-NEXT:   Link: 0
+@ CHECK-NEXT:   Info: 0
+@ CHECK-NEXT:   AddressAlignment: 1
+@ CHECK-NEXT:   EntrySize: 0
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Section {
+@ CHECK-NEXT:   Index: 10
+@ CHECK-NEXT:   Name: .ARM.exidx
+@ CHECK-NEXT:   Type: SHT_ARM_EXIDX
+@ CHECK-NEXT:   Flags [
+@ CHECK-NEXT:     SHF_ALLOC
+@ CHECK-NEXT:     SHF_GROUP
+@ CHECK-NEXT:     SHF_LINK_ORDER
+@ CHECK-NEXT:   ]
+@ CHECK-NEXT:   Address: 0x0
+@ CHECK-NEXT:   Offset:
+@ CHECK-NEXT:   Size: 8
+@ CHECK-NEXT:   Link: 9
+@ CHECK-NEXT:   Info: 0
+@ CHECK-NEXT:   AddressAlignment: 4
+@ CHECK-NEXT:   EntrySize: 0
+@ CHECK-NEXT: }
+
+	.section	.text,"axG",%progbits,f,comdat
+f:
+	.fnstart
+	mov	pc, lr
+	.fnend
+
+	.section	.text,"axG",%progbits,g,comdat
+g:
+	.fnstart
+	mov	pc, lr
+	.fnend
diff --git a/test/MC/ARM/elf-movt.s b/test/MC/ARM/elf-movt.s
index 0080db4..1ff5da5 100644
--- a/test/MC/ARM/elf-movt.s
+++ b/test/MC/ARM/elf-movt.s
@@ -35,14 +35,14 @@ barf:                                   @ @barf
 @ OBJ-NEXT:       0000: F00F0FE3 F40F4FE3
 @ OBJ-NEXT:     )
 @ OBJ-NEXT:   }
-@ OBJ-NEXT:   Section {
-@ OBJ-NEXT:     Index: 2
-@ OBJ-NEXT:     Name: .rel.text (1)
+@ OBJ:        Section {
+@ OBJ:          Index:
+@ OBJ:          Name: .rel.text
 @ OBJ-NEXT:     Type: SHT_REL (0x9)
 @ OBJ-NEXT:     Flags [ (0x0)
 @ OBJ-NEXT:     ]
 @ OBJ-NEXT:     Address: 0x0
-@ OBJ-NEXT:     Offset: 0x22C
+@ OBJ-NEXT:     Offset:
 @ OBJ-NEXT:     Size: 16
 @ OBJ-NEXT:     Link: 6
 @ OBJ-NEXT:     Info: 1
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index 28be85b..7f3cc18 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -61,7 +61,7 @@ bb3:                                              ; preds = %bb, %entry
 declare void @exit(i32) noreturn nounwind
 
 ; OBJ: Relocations [
-; OBJ:   Section (2) .rel.text {
+; OBJ:   Section {{.*}} .rel.text {
 ; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC _MergedGlobals
 ; OBJ:   }
 ; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-02.ll b/test/MC/ARM/elf-reloc-02.ll
index 8b4feba..0ffb623 100644
--- a/test/MC/ARM/elf-reloc-02.ll
+++ b/test/MC/ARM/elf-reloc-02.ll
@@ -29,10 +29,10 @@ declare void @myhextochar(i32 %n, i8* nocapture %buffer) nounwind
 
 define i32 @main() nounwind {
 entry:
-  %0 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 6) nounwind
-  %1 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str1, i32 0, i32 0), i32 7) nounwind
-  %2 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str2, i32 0, i32 0), i32 12) nounwind
-  %3 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str3, i32 0, i32 0), i32 6) nounwind
+  %0 = tail call i32 (...) @write(i32 1, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 6) nounwind
+  %1 = tail call i32 (...) @write(i32 1, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str1, i32 0, i32 0), i32 7) nounwind
+  %2 = tail call i32 (...) @write(i32 1, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str2, i32 0, i32 0), i32 12) nounwind
+  %3 = tail call i32 (...) @write(i32 1, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str3, i32 0, i32 0), i32 6) nounwind
   tail call void @exit(i32 55) noreturn nounwind
   unreachable
 }
@@ -42,7 +42,7 @@ declare i32 @write(...)
 declare void @exit(i32) noreturn nounwind
 
 ;; OBJ:      Relocations [
-;; OBJ:        Section (2) .rel.text {
+;; OBJ:        Section {{.*}} .rel.text {
 ;; OBJ-NEXT:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC .L.str
 ;; OBJ:        }
 ;; OBJ:      ]
diff --git a/test/MC/ARM/elf-reloc-03.ll b/test/MC/ARM/elf-reloc-03.ll
index a0fdc3e..4beb91f 100644
--- a/test/MC/ARM/elf-reloc-03.ll
+++ b/test/MC/ARM/elf-reloc-03.ll
@@ -81,7 +81,7 @@ entry:
   %0 = load i32, i32* @startval, align 4
   %1 = getelementptr inbounds [10 x i32 (...)*], [10 x i32 (...)*]* @vtable, i32 0, i32 %0
   %2 = load i32 (...)*, i32 (...)** %1, align 4
-  %3 = tail call i32 (...)* %2() nounwind
+  %3 = tail call i32 (...) %2() nounwind
   tail call void @exit(i32 %3) noreturn nounwind
   unreachable
 }
@@ -89,7 +89,7 @@ entry:
 declare void @exit(i32) noreturn nounwind
 
 ;; OBJ: Relocations [
-;; OBJ:   Section (2) .rel.text {
+;; OBJ:   Section {{.*}} .rel.text {
 ;; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC vtable
 ;; OBJ:   }
 ;; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
index a0402bd..c4818b8 100644
--- a/test/MC/ARM/elf-reloc-condcall.s
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -8,7 +8,7 @@
         b some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rel.text {
+// OBJ-NEXT:   Section {{.*}} .rel.text {
 // OBJ-NEXT:     0x0  R_ARM_JUMP24 some_label 0x0
 // OBJ-NEXT:     0x4  R_ARM_CALL   some_label 0x0
 // OBJ-NEXT:     0x8  R_ARM_CALL   some_label 0x0
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.ll b/test/MC/ARM/elf-thumbfunc-reloc.ll
index f502739..f35971a 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.ll
+++ b/test/MC/ARM/elf-thumbfunc-reloc.ll
@@ -29,10 +29,10 @@ entry:
 ; CHECK: ]
 
 ; CHECK:      Relocations [
-; CHECK-NEXT:   Section (2) .rel.text {
+; CHECK-NEXT:   Section {{.*}} .rel.text {
 ; CHECK-NEXT:     0x8 R_ARM_THM_CALL foo 0x0
 ; CHECK-NEXT:   }
-; CHECK-NEXT:   Section (7) .rel.ARM.exidx {
+; CHECK-NEXT:   Section {{.*}} .rel.ARM.exidx {
 ; CHECK-NEXT:     0x0 R_ARM_PREL31 .text 0x0
 ; CHECK-NEXT:     0x8 R_ARM_PREL31 .text 0x0
 ; CHECK-NEXT:   }
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
index ea7d507..dd380c3 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.s
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -22,7 +22,7 @@ ptr:
 
 @@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
 @CHECK:      Relocations [
-@CHECK-NEXT:   Section (2) .rel.text {
+@CHECK-NEXT:   Section {{.*}} .rel.text {
 @CHECK-NEXT:     0x4 R_ARM_THM_CALL g 0x0
 @CHECK-NEXT:   }
 
diff --git a/test/MC/ARM/thumb1-relax-adr.s b/test/MC/ARM/thumb1-relax-adr.s
new file mode 100644
index 0000000..80b93ec
--- /dev/null
+++ b/test/MC/ARM/thumb1-relax-adr.s
@@ -0,0 +1,9 @@
+@ RUN: not llvm-mc -triple thumbv6m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+@ RUN: not llvm-mc -triple thumbv7m-none-macho -filetype=obj -o /dev/null %s 2>&1  | FileCheck --check-prefix=CHECK-ERROR %s
+@ RUN: not llvm-mc -triple thumbv7m-none-eabi -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+
+        .global func1
+_func1:
+        adr r0, _func2
+@ CHECK-ERROR: unsupported relocation on symbol
+
diff --git a/test/MC/ARM/thumb1-relax-bcc.s b/test/MC/ARM/thumb1-relax-bcc.s
new file mode 100644
index 0000000..02fde2e
--- /dev/null
+++ b/test/MC/ARM/thumb1-relax-bcc.s
@@ -0,0 +1,12 @@
+@ RUN: not llvm-mc -triple thumbv6m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+@ RUN: not llvm-mc -triple thumbv7m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+@ RUN: llvm-mc -triple thumbv7m-none-eabi -filetype=obj -o %t %s
+@ RUN:    llvm-objdump -d -r -triple thumbv7m-none-eabi %t | FileCheck --check-prefix=CHECK-ELF %s
+
+        .global func1
+_func1:
+        bne _func2
+@ CHECK-ERROR: unsupported relocation on symbol
+
+@ CHECK-ELF: 7f f4 fe af        bne.w #-4
+@ CHECK-ELF-NEXT: R_ARM_THM_JUMP24 _func2
diff --git a/test/MC/ARM/thumb1-relax-br.s b/test/MC/ARM/thumb1-relax-br.s
new file mode 100644
index 0000000..92a8275
--- /dev/null
+++ b/test/MC/ARM/thumb1-relax-br.s
@@ -0,0 +1,19 @@
+@ RUN: not llvm-mc -triple thumbv6m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+@ RUN: llvm-mc -triple thumbv7m-none-macho -filetype=obj -o %t %s
+@ RUN:    llvm-objdump -d -r -triple thumbv7m-none-macho %t | FileCheck --check-prefix=CHECK-MACHO %s
+@ RUN: llvm-mc -triple thumbv7m-none-eabi -filetype=obj -o %t %s
+@ RUN:    llvm-objdump -d -r -triple thumbv7m-none-eabi %t | FileCheck --check-prefix=CHECK-ELF %s
+
+        .global func1
+_func1:
+        @ There is no MachO relocation for Thumb1's unconditional branch, so
+        @ this is unrepresentable. FIXME: I think ELF could represent this.
+        b _func2
+
+@ CHECK-ERROR: unsupported relocation on symbol
+
+@ CHECK-MACHO: ff f7 fe bf        b.w #-4
+@ CHECK-MACHO-NEXT: ARM_THUMB_RELOC_BR22
+
+@ CHECK-ELF: ff f7 fe bf        b.w #-4
+@ CHECK-ELF-NEXT: R_ARM_THM_JUMP24 _func2
diff --git a/test/MC/ARM/thumb1-relax-ldrlit.s b/test/MC/ARM/thumb1-relax-ldrlit.s
new file mode 100644
index 0000000..96c5aa0
--- /dev/null
+++ b/test/MC/ARM/thumb1-relax-ldrlit.s
@@ -0,0 +1,9 @@
+@ RUN: not llvm-mc -triple thumbv6m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+@ RUN: not llvm-mc -triple thumbv7m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+@ RUN: not llvm-mc -triple thumbv7m-none-eabi -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+
+        .global func1
+_func1:
+        ldr r0, _func2
+@ CHECK-ERROR: unsupported relocation on symbol
+
diff --git a/test/MC/ARM/thumb2-bxj-v8.s b/test/MC/ARM/thumb2-bxj-v8.s
new file mode 100644
index 0000000..4420b6f
--- /dev/null
+++ b/test/MC/ARM/thumb2-bxj-v8.s
@@ -0,0 +1,11 @@
+@ RUN: not llvm-mc -triple=thumbv6t2--none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=UNDEF
+@ RUN: not llvm-mc -triple=thumbv7a--none-eabi -show-encoding < %s 2>&1  | FileCheck %s --check-prefix=UNDEF
+@ RUN: not llvm-mc -triple=thumbv7r--none-eabi -show-encoding < %s 2>&1  | FileCheck %s --check-prefix=UNDEF
+@ RUN: not llvm-mc -triple=thumbv7m--none-eabi -show-encoding < %s 2>&1  | FileCheck %s --check-prefix=ARM_MODE
+@ RUN: llvm-mc -triple=thumbv8a--none-eabi -show-encoding < %s 2>&1  | FileCheck %s
+
+bxj r13
+
+@ CHECK: bxj	sp                      @ encoding: [0xcd,0xf3,0x00,0x8f]
+@ UNDEF:  error: r13 (SP) is an unpredictable operand to BXJ
+@ ARM_MODE: error: instruction requires: arm-mode
diff --git a/test/MC/ARM/thumb2-bxj.s b/test/MC/ARM/thumb2-bxj.s
index e60d1a4..7687939 100644
--- a/test/MC/ARM/thumb2-bxj.s
+++ b/test/MC/ARM/thumb2-bxj.s
@@ -1,8 +1,8 @@
 @ RUN: llvm-mc -triple=thumbv6t2--none-eabi -show-encoding < %s | FileCheck %s
 @ RUN: llvm-mc -triple=thumbv7a--none-eabi -show-encoding < %s | FileCheck %s
 @ RUN: llvm-mc -triple=thumbv7r--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv8a--none-eabi -show-encoding < %s | FileCheck %s
 @ RUN: not llvm-mc -triple=thumbv7m--none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=UNDEF
-@ RUN: not llvm-mc -triple=thumbv8a--none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=UNDEF
 
         bxj r2
 
diff --git a/test/MC/Disassembler/AArch64/arm64-advsimd.txt b/test/MC/Disassembler/AArch64/arm64-advsimd.txt
index cceee67..ef125c8 100644
--- a/test/MC/Disassembler/AArch64/arm64-advsimd.txt
+++ b/test/MC/Disassembler/AArch64/arm64-advsimd.txt
@@ -169,6 +169,43 @@
 # CHECK: ins.h v2[7], v15[3]
 # CHECK: ins.b v2[10], v15[5]
 
+# INS/DUP (non-standard)
+0x60 0x0c 0x08 0x4e
+0x60 0x0c 0x0c 0x4e
+0x60 0x0c 0x0c 0x0e
+0x60 0x0c 0x0e 0x4e
+0x60 0x0c 0x0e 0x0e
+0x60 0x0c 0x0f 0x4e
+0x60 0x0c 0x0f 0x0e
+
+# CHECK: dup.2d  v0, x3
+# CHECK: dup.4s  v0, w3
+# CHECK: dup.2s  v0, w3
+# CHECK: dup.8h  v0, w3
+# CHECK: dup.4h  v0, w3
+# CHECK: dup.16b v0, w3
+# CHECK: dup.8b  v0, w3
+
+0xe2 0x75 0x18 0x6e
+0xe2 0x35 0x0c 0x6e
+0xe2 0x15 0x06 0x6e
+0xe2 0x0d 0x03 0x6e
+
+0xe2 0x05 0x18 0x6e
+0xe2 0x55 0x1c 0x6e
+0xe2 0x35 0x1e 0x6e
+0xe2 0x2d 0x15 0x6e
+
+# CHECK: ins.d v2[1], v15[1]
+# CHECK: ins.s v2[1], v15[1]
+# CHECK: ins.h v2[1], v15[1]
+# CHECK: ins.b v2[1], v15[1]
+
+# CHECK: ins.d v2[1], v15[0]
+# CHECK: ins.s v2[3], v15[2]
+# CHECK: ins.h v2[7], v15[3]
+# CHECK: ins.b v2[10], v15[5]
+
 0x00 0x1c 0x20 0x0e
 0x00 0x1c 0x20 0x4e
 
diff --git a/test/MC/Disassembler/AArch64/armv8.1a-lor.txt b/test/MC/Disassembler/AArch64/armv8.1a-lor.txt
new file mode 100644
index 0000000..5f8e725
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.1a-lor.txt
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s
+
+0x20,0x7c,0xdf,0x08
+0x20,0x7c,0xdf,0x48
+0x20,0x7c,0xdf,0x88
+0x20,0x7c,0xdf,0xc8
+0x20,0x7c,0x9f,0x08
+0x20,0x7c,0x9f,0x48
+0x20,0x7c,0x9f,0x88
+0x20,0x7c,0x9f,0xc8
+# CHECK:   ldlarb w0, [x1]
+# CHECK:   ldlarh w0, [x1]
+# CHECK:   ldlar  w0, [x1]
+# CHECK:   ldlar  x0, [x1]
+# CHECK:   stllrb w0, [x1]
+# CHECK:   stllrh w0, [x1]
+# CHECK:   stllr  w0, [x1]
+# CHECK:   stllr  x0, [x1]
+0x00,0xa4,0x18,0xd5
+0x20,0xa4,0x18,0xd5
+0x40,0xa4,0x18,0xd5
+0x60,0xa4,0x18,0xd5
+0xe0,0xa4,0x38,0xd5
+# CHECK:   msr LORSA_EL1, x0
+# CHECK:   msr LOREA_EL1, x0
+# CHECK:   msr LORN_EL1, x0
+# CHECK:   msr LORC_EL1, x0
+# CHECK:   mrs x0, LORID_EL1
diff --git a/test/MC/Disassembler/AArch64/armv8.1a-pan.txt b/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
new file mode 100644
index 0000000..2af5c2a
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s
+
+0x9f,0x40,0x00,0xd5
+0x9f,0x41,0x00,0xd5
+0x65,0x42,0x18,0xd5
+0x6d,0x42,0x38,0xd5
+# CHECK:  msr PAN, #0
+# CHECK:  msr PAN, #1
+# CHECK:  msr PAN, x5
+# CHECK:  mrs x13, PAN
diff --git a/test/MC/Disassembler/AArch64/armv8.1a-rdma.txt b/test/MC/Disassembler/AArch64/armv8.1a-rdma.txt
new file mode 100644
index 0000000..5e1272f
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.1a-rdma.txt
@@ -0,0 +1,129 @@
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s 2>&1 | FileCheck %s
+
+[0x20,0x84,0x02,0x2e] # sqrdmlah  v0.8b, v1.8b, v2.8b
+[0x20,0x8c,0x02,0x2e] # sqrdmlsh  v0.8b, v1.8b, v2.8b
+[0x20,0x84,0xc2,0x2e] # sqrdmlah  v0.1d, v1.1d, v2.1d
+[0x20,0x8c,0xc2,0x2e] # sqrdmlsh  v0.1d, v1.1d, v2.1d
+[0x20,0x84,0x02,0x6e] # sqrdmlah  v0.16b, v1.16b, v2.16b
+[0x20,0x8c,0x02,0x6e] # sqrdmlsh  v0.16b, v1.16b, v2.16b
+[0x20,0x84,0xc2,0x6e] # sqrdmlah  v0.2d, v1.2d, v2.2d
+[0x20,0x8c,0xc2,0x6e] # sqrdmlsh  v0.2d, v1.2d, v2.2d
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x84,0x02,0x2e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x8c,0x02,0x2e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x84,0xc2,0x2e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x8c,0xc2,0x2e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x84,0x02,0x6e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x8c,0x02,0x6e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x84,0xc2,0x6e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x8c,0xc2,0x6e]
+
+[0x20,0x84,0x02,0x7e] # sqrdmlah b0, b1, b2
+[0x20,0x8c,0x02,0x7e] # sqrdmlsh b0, b1, b2
+[0x20,0x84,0xc2,0x7e] # sqrdmlah d0, d1, d2
+[0x20,0x8c,0xc2,0x7e] # sqrdmlsh d0, d1, d2
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x84,0x02,0x7e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x8c,0x02,0x7e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x84,0xc2,0x7e]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x8c,0xc2,0x7e]
+
+[0x20,0xd0,0x32,0x2f] # sqrdmlah v0.8b, v1.8b, v2.b[3]
+[0x20,0xf0,0x32,0x2f] # sqrdmlsh v0.8b, v1.8b, v2.b[3]
+[0x20,0xd0,0xe2,0x2f] # sqrdmlah v0.1d, v1.1d, v2.d[1]
+[0x20,0xf0,0xe2,0x2f] # sqrdmlsh v0.1d, v1.1d, v2.d[1]
+[0x20,0xd0,0x32,0x6f] # sqrdmlah v0.16b, v1.16b, v2.b[3]
+[0x20,0xf0,0x32,0x6f] # sqrdmlsh v0.16b, v1.16b, v2.b[3]
+[0x20,0xd8,0xe2,0x6f] # sqrdmlah v0.2d, v1.2d, v2.d[3]
+[0x20,0xf8,0xe2,0x6f] # sqrdmlsh v0.2d, v1.2d, v2.d[3]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xd0,0x32,0x2f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xf0,0x32,0x2f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xd0,0xe2,0x2f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xf0,0xe2,0x2f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xd0,0x32,0x6f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xf0,0x32,0x6f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xd8,0xe2,0x6f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xf8,0xe2,0x6f]
+
+[0x20,0xd0,0x32,0x7f] # sqrdmlah b0, b1, v2.b[3]
+[0x20,0xf0,0x32,0x7f] # sqrdmlsh b0, b1, v2.b[3]
+[0x20,0xd8,0xe2,0x7f] # sqrdmlah d0, d1, v2.d[3]
+[0x20,0xf8,0xe2,0x7f] # sqrdmlsh d0, d1, v2.d[3]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xd0,0x32,0x7f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xf0,0x32,0x7f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xd8,0xe2,0x7f]
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0xf8,0xe2,0x7f]
+
+[0x20,0x84,0x42,0x2e]
+[0x20,0x8c,0x42,0x2e]
+[0x20,0x84,0x82,0x2e]
+[0x20,0x8c,0x82,0x2e]
+[0x20,0x84,0x42,0x6e]
+[0x20,0x8c,0x42,0x6e]
+[0x20,0x84,0x82,0x6e]
+[0x20,0x8c,0x82,0x6e]
+# CHECK: sqrdmlah  v0.4h, v1.4h, v2.4h
+# CHECK: sqrdmlsh  v0.4h, v1.4h, v2.4h
+# CHECK: sqrdmlah  v0.2s, v1.2s, v2.2s
+# CHECK: sqrdmlsh  v0.2s, v1.2s, v2.2s
+# CHECK: sqrdmlah  v0.8h, v1.8h, v2.8h
+# CHECK: sqrdmlsh  v0.8h, v1.8h, v2.8h
+# CHECK: sqrdmlah  v0.4s, v1.4s, v2.4s
+# CHECK: sqrdmlsh  v0.4s, v1.4s, v2.4s
+
+[0x20,0x84,0x42,0x7e]
+[0x20,0x8c,0x42,0x7e]
+[0x20,0x84,0x82,0x7e]
+[0x20,0x8c,0x82,0x7e]
+# CHECK: sqrdmlah h0, h1, h2
+# CHECK: sqrdmlsh h0, h1, h2
+# CHECK: sqrdmlah s0, s1, s2
+# CHECK: sqrdmlsh s0, s1, s2
+
+0x20,0xd0,0x72,0x2f
+0x20,0xf0,0x72,0x2f
+0x20,0xd0,0xa2,0x2f
+0x20,0xf0,0xa2,0x2f
+0x20,0xd0,0x72,0x6f
+0x20,0xf0,0x72,0x6f
+0x20,0xd8,0xa2,0x6f
+0x20,0xf8,0xa2,0x6f
+# CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3]
+# CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3]
+# CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1]
+# CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1]
+# CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3]
+# CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3]
+# CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3]
+# CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3]
+
+0x20,0xd0,0x72,0x7f
+0x20,0xf0,0x72,0x7f
+0x20,0xd8,0xa2,0x7f
+0x20,0xf8,0xa2,0x7f
+# CHECK: sqrdmlah h0, h1, v2.h[3]
+# CHECK: sqrdmlsh h0, h1, v2.h[3]
+# CHECK: sqrdmlah s0, s1, v2.s[3]
+# CHECK: sqrdmlsh s0, s1, v2.s[3]
diff --git a/test/MC/Disassembler/AArch64/armv8.1a-vhe.txt b/test/MC/Disassembler/AArch64/armv8.1a-vhe.txt
new file mode 100644
index 0000000..e4bf59c
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.1a-vhe.txt
@@ -0,0 +1,56 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s
+
+0x20,0x20,0x1c,0xd5
+0x20,0xd0,0x1c,0xd5
+0x00,0xe3,0x1c,0xd5
+0x40,0xe3,0x1c,0xd5
+0x20,0xe3,0x1c,0xd5
+0x00,0x10,0x1d,0xd5
+0x40,0x10,0x1d,0xd5
+0x00,0x20,0x1d,0xd5
+0x20,0x20,0x1d,0xd5
+0x40,0x20,0x1d,0xd5
+0x00,0x51,0x1d,0xd5
+0x20,0x51,0x1d,0xd5
+0x00,0x52,0x1d,0xd5
+0x00,0x60,0x1d,0xd5
+0x00,0xa2,0x1d,0xd5
+0x00,0xa3,0x1d,0xd5
+0x00,0xc0,0x1d,0xd5
+0x20,0xd0,0x1d,0xd5
+0x00,0xe1,0x1d,0xd5
+0x00,0xe2,0x1d,0xd5
+0x20,0xe2,0x1d,0xd5
+0x40,0xe2,0x1d,0xd5
+0x00,0xe3,0x1d,0xd5
+0x20,0xe3,0x1d,0xd5
+0x40,0xe3,0x1d,0xd5
+0x00,0x40,0x1d,0xd5
+0x20,0x40,0x1d,0xd5
+# CHECK:   msr TTBR1_EL2, x0
+# CHECK:   msr CONTEXTIDR_EL2, x0
+# CHECK:   msr CNTHV_TVAL_EL2, x0
+# CHECK:   msr CNTHV_CVAL_EL2, x0
+# CHECK:   msr CNTHV_CTL_EL2, x0
+# CHECK:   msr SCTLR_EL12, x0
+# CHECK:   msr CPACR_EL12, x0
+# CHECK:   msr TTBR0_EL12, x0
+# CHECK:   msr TTBR1_EL12, x0
+# CHECK:   msr TCR_EL12, x0
+# CHECK:   msr AFSR0_EL12, x0
+# CHECK:   msr AFSR1_EL12, x0
+# CHECK:   msr ESR_EL12, x0
+# CHECK:   msr FAR_EL12, x0
+# CHECK:   msr MAIR_EL12, x0
+# CHECK:   msr AMAIR_EL12, x0
+# CHECK:   msr VBAR_EL12, x0
+# CHECK:   msr CONTEXTIDR_EL12, x0
+# CHECK:   msr CNTKCTL_EL12, x0
+# CHECK:   msr CNTP_TVAL_EL02, x0
+# CHECK:   msr CNTP_CTL_EL02, x0
+# CHECK:   msr CNTP_CVAL_EL02, x0
+# CHECK:   msr CNTV_TVAL_EL02, x0
+# CHECK:   msr CNTV_CTL_EL02, x0
+# CHECK:   msr CNTV_CVAL_EL02, x0
+# CHECK:   msr SPSR_EL12, x0
+# CHECK:   msr ELR_EL12, x0
diff --git a/test/MC/Disassembler/ARM/armv8.1a.txt b/test/MC/Disassembler/ARM/armv8.1a.txt
index de0c89e..929643b 100644
--- a/test/MC/Disassembler/ARM/armv8.1a.txt
+++ b/test/MC/Disassembler/ARM/armv8.1a.txt
@@ -34,3 +34,19 @@
 # CHECK-V8: [0x42,0x0f,0x92,0xf3]
 # CHECK-V8: warning: invalid instruction encoding
 # CHECK-V8: [0x42,0x0f,0xa1,0xf2]
+
+# The SETPAN(v8.1a) and TST(v8) instructions occupy the same space, but SETPAN 
+# uses the encoding for the invalid NV predicate operand. This test checks that 
+# the disassembler is correctly disambiguating and decoding these instructions.
+
+[0x00 0x00 0x10 0xf1]
+# CHECK: setpan #0
+
+[0x00 0x02 0x10 0xf1]
+# CHECK: setpan #1
+
+[0x00 0x00 0x10 0xe1]
+# CHECK: tst r0, r0
+
+[0x00 0x02 0x10 0xe1]
+# CHECK: tst r0, r0, lsl #4
diff --git a/test/MC/Disassembler/ARM/thumb-v8.1a.txt b/test/MC/Disassembler/ARM/thumb-v8.1a.txt
index 10fea46..3de8c27 100644
--- a/test/MC/Disassembler/ARM/thumb-v8.1a.txt
+++ b/test/MC/Disassembler/ARM/thumb-v8.1a.txt
@@ -96,3 +96,15 @@
 # CHECK-V8: warning: invalid instruction encoding
 # CHECK-V8: [0xa2,0xff,0x42,0x0f]
 # CHECK-V8: ^
+
+[0x10,0xb6]
+# CHECK-V81a: setpan #0
+# CHECK-V8: warning: invalid instruction encoding
+# CHECK-V8: [0x10,0xb6]
+# CHECK-V8: ^
+
+[0x18,0xb6]
+# CHECK-V81a: setpan #1
+# CHECK-V8: warning: invalid instruction encoding
+# CHECK-V8: [0x18,0xb6]
+# CHECK-V8: ^
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
index e99d49b..f9cdb51 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
@@ -328,6 +328,18 @@
 # CHECK: divwu. 2, 3, 4                  
 0x7c 0x43 0x23 0x97
 
+# CHECK: divwe 2, 3, 4
+0x7c 0x43 0x23 0x56
+
+# CHECK: divwe. 2, 3, 4
+0x7c 0x43 0x23 0x57
+
+# CHECK: divweu 2, 3, 4
+0x7c 0x43 0x23 0x16
+
+# CHECK: divweu. 2, 3, 4
+0x7c 0x43 0x23 0x17
+
 # CHECK: mulld 2, 3, 4                   
 0x7c 0x43 0x21 0xd2
 
@@ -358,6 +370,18 @@
 # CHECK: divdu. 2, 3, 4                  
 0x7c 0x43 0x23 0x93
 
+# CHECK: divde 2, 3, 4
+0x7c 0x43 0x23 0x52
+
+# CHECK: divde. 2, 3, 4
+0x7c 0x43 0x23 0x53
+
+# CHECK: divdeu 2, 3, 4
+0x7c 0x43 0x23 0x12
+
+# CHECK: divdeu. 2, 3, 4
+0x7c 0x43 0x23 0x13
+
 # CHECK: cmpdi 2, 3, 128                 
 0x2d 0x23 0x00 0x80
 
@@ -499,6 +523,9 @@
 # CHECK: popcntd 2, 3                    
 0x7c 0x62 0x03 0xf4
 
+# CHECK: bpermd 2, 3, 4
+0x7c 0x62 0x21 0xf8
+
 # CHECK: cmpb 7, 21, 4
 0x7e 0xa7 0x23 0xf8
 
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt
index 5e65482..1b91b34 100644
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -459,3 +459,17 @@
 # CHECK: xxpermdi 7, 63, 63, 2
 0xf0 0xff 0xfa 0x56
 
+# CHECK: mfvsrd 3, 0
+0x7c 0x03 0x00 0x66
+
+# CHECK: mfvsrwz 5, 0
+0x7c 0x05 0x00 0xe6
+
+# CHECK: mtvsrd 0, 3
+0x7c 0x03 0x01 0x66
+
+# CHECK: mtvsrwa 0, 3
+0x7c 0x03 0x01 0xa6
+
+# CHECK: mtvsrwz 0, 3
+0x7c 0x03 0x01 0xe6
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index 54a3c5b..9d3f2b0 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -2503,6 +2503,15 @@
 # CHECK: ear %r15, %a15
 0xb2 0x4f 0x00 0xff
 
+# CHECK: etnd %r0
+0xb2 0xec 0x00 0x00
+
+# CHECK: etnd %r15
+0xb2 0xec 0x00 0xf0
+
+# CHECK: etnd %r7
+0xb2 0xec 0x00 0x70
+
 # CHECK: fidbr %f0, 0, %f0
 0xb3 0x5f 0x00 0x00
 
@@ -6034,6 +6043,36 @@
 # CHECK: ny %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x54
 
+# CHECK: ntstg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x25
+
+# CHECK: ntstg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x25
+
+# CHECK: ntstg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x25
+
+# CHECK: ntstg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x25
+
+# CHECK: ntstg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x25
+
+# CHECK: ntstg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x25
+
+# CHECK: ntstg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x25
+
 # CHECK: oc 0(1), 0
 0xd6 0x00 0x00 0x00 0x00 0x00
 
@@ -6334,6 +6373,33 @@
 # CHECK: pfd 15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x36
 
+# CHECK: popcnt %r0, %r0
+0xb9 0xe1 0x00 0x00
+
+# CHECK: popcnt %r0, %r15
+0xb9 0xe1 0x00 0x0f
+
+# CHECK: popcnt %r15, %r0
+0xb9 0xe1 0x00 0xf0
+
+# CHECK: popcnt %r7, %r8
+0xb9 0xe1 0x00 0x78
+
+# CHECK: ppa %r0, %r0, 0
+0xb2 0xe8 0x00 0x00
+
+# CHECK: ppa %r0, %r0, 15
+0xb2 0xe8 0xf0 0x00
+
+# CHECK: ppa %r0, %r15, 0
+0xb2 0xe8 0x00 0x0f
+
+# CHECK: ppa %r4, %r6, 7
+0xb2 0xe8 0x70 0x46
+
+# CHECK: ppa %r15, %r0, 0
+0xb2 0xe8 0x00 0xf0
+
 # CHECK: risbg %r0, %r0, 0, 0, 0
 0xec 0x00 0x00 0x00 0x00 0x55
 
@@ -6355,6 +6421,27 @@
 # CHECK: risbg %r4, %r5, 6, 7, 8
 0xec 0x45 0x06 0x07 0x08 0x55
 
+# CHECK: risbgn %r0, %r0, 0, 0, 0
+0xec 0x00 0x00 0x00 0x00 0x59
+
+# CHECK: risbgn %r0, %r0, 0, 0, 63
+0xec 0x00 0x00 0x00 0x3f 0x59
+
+# CHECK: risbgn %r0, %r0, 0, 255, 0
+0xec 0x00 0x00 0xff 0x00 0x59
+
+# CHECK: risbgn %r0, %r0, 255, 0, 0
+0xec 0x00 0xff 0x00 0x00 0x59
+
+# CHECK: risbgn %r0, %r15, 0, 0, 0
+0xec 0x0f 0x00 0x00 0x00 0x59
+
+# CHECK: risbgn %r15, %r0, 0, 0, 0
+0xec 0xf0 0x00 0x00 0x00 0x59
+
+# CHECK: risbgn %r4, %r5, 6, 7, 8
+0xec 0x45 0x06 0x07 0x08 0x59
+
 # CHECK: risbhg %r0, %r0, 0, 0, 0
 0xec 0x00 0x00 0x00 0x00 0x5d
 
@@ -8029,6 +8116,93 @@
 # CHECK: sy %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x5b
 
+# CHECK: tabort 0
+0xb2 0xfc 0x00 0x00
+
+# CHECK: tabort 0(%r1)
+0xb2 0xfc 0x10 0x00
+
+# CHECK: tabort 0(%r15)
+0xb2 0xfc 0xf0 0x00
+
+# CHECK: tabort 4095
+0xb2 0xfc 0x0f 0xff
+
+# CHECK: tabort 4095(%r1)
+0xb2 0xfc 0x1f 0xff
+
+# CHECK: tabort 4095(%r15)
+0xb2 0xfc 0xff 0xff
+
+# CHECK: tbegin 0, 0
+0xe5 0x60 0x00 0x00 0x00 0x00
+
+# CHECK: tbegin 4095, 0
+0xe5 0x60 0x0f 0xff 0x00 0x00
+
+# CHECK: tbegin 0, 0
+0xe5 0x60 0x00 0x00 0x00 0x00
+
+# CHECK: tbegin 0, 1
+0xe5 0x60 0x00 0x00 0x00 0x01
+
+# CHECK: tbegin 0, 32767
+0xe5 0x60 0x00 0x00 0x7f 0xff
+
+# CHECK: tbegin 0, 32768
+0xe5 0x60 0x00 0x00 0x80 0x00
+
+# CHECK: tbegin 0, 65535
+0xe5 0x60 0x00 0x00 0xff 0xff
+
+# CHECK: tbegin 0(%r1), 42
+0xe5 0x60 0x10 0x00 0x00 0x2a
+
+# CHECK: tbegin 0(%r15), 42
+0xe5 0x60 0xf0 0x00 0x00 0x2a
+
+# CHECK: tbegin 4095(%r1), 42
+0xe5 0x60 0x1f 0xff 0x00 0x2a
+
+# CHECK: tbegin 4095(%r15), 42
+0xe5 0x60 0xff 0xff 0x00 0x2a
+
+# CHECK: tbeginc 0, 0
+0xe5 0x61 0x00 0x00 0x00 0x00
+
+# CHECK: tbeginc 4095, 0
+0xe5 0x61 0x0f 0xff 0x00 0x00
+
+# CHECK: tbeginc 0, 0
+0xe5 0x61 0x00 0x00 0x00 0x00
+
+# CHECK: tbeginc 0, 1
+0xe5 0x61 0x00 0x00 0x00 0x01
+
+# CHECK: tbeginc 0, 32767
+0xe5 0x61 0x00 0x00 0x7f 0xff
+
+# CHECK: tbeginc 0, 32768
+0xe5 0x61 0x00 0x00 0x80 0x00
+
+# CHECK: tbeginc 0, 65535
+0xe5 0x61 0x00 0x00 0xff 0xff
+
+# CHECK: tbeginc 0(%r1), 42
+0xe5 0x61 0x10 0x00 0x00 0x2a
+
+# CHECK: tbeginc 0(%r15), 42
+0xe5 0x61 0xf0 0x00 0x00 0x2a
+
+# CHECK: tbeginc 4095(%r1), 42
+0xe5 0x61 0x1f 0xff 0x00 0x2a
+
+# CHECK: tbeginc 4095(%r15), 42
+0xe5 0x61 0xff 0xff 0x00 0x2a
+
+# CHECK: tend
+0xb2 0xf8 0x00 0x00
+
 # CHECK: tm 0, 0
 0x91 0x00 0x00 0x00
 
diff --git a/test/MC/Disassembler/X86/x86-16.txt b/test/MC/Disassembler/X86/x86-16.txt
index 93974d4..c6844cd 100644
--- a/test/MC/Disassembler/X86/x86-16.txt
+++ b/test/MC/Disassembler/X86/x86-16.txt
@@ -30,7 +30,7 @@
 # CHECK: movl %eax, -16(%ebp)
 0x67 0x66 0x89 0x45 0xf0
 
-# CHECK: testb %bl, %cl
+# CHECK: testb %cl, %bl
 0x84 0xcb
 
 # CHECK: cmpl %eax, %ebx
diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s
index 78df737..0ab6dd4 100644
--- a/test/MC/ELF/alias.s
+++ b/test/MC/ELF/alias.s
@@ -24,6 +24,15 @@ bar5 = bar4
 bar6 = bar5
 bar6:
 
+// Test that indirect local aliases do not appear as symbols.
+.data
+.Llocal:
+
+.text
+leaq .Llocal1(%rip), %rdi
+.Llocal1 = .Llocal2
+.Llocal2 = .Llocal
+
 // CHECK:      Symbols [
 // CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name:  (0)
diff --git a/test/MC/ELF/basic-elf-32.s b/test/MC/ELF/basic-elf-32.s
index e12fc52..16266af 100644
--- a/test/MC/ELF/basic-elf-32.s
+++ b/test/MC/ELF/basic-elf-32.s
@@ -45,7 +45,7 @@ main:                                   # @main
 // CHECK:     Name: .rel.text
 
 // CHECK: Relocations [
-// CHECK:   Section (2) .rel.text {
+// CHECK:   Section {{.*}} .rel.text {
 // CHECK:     0x6  R_386_32   .L.str1
 // CHECK:     0xB  R_386_PC32 puts
 // CHECK:     0x12 R_386_32   .L.str2
diff --git a/test/MC/ELF/basic-elf-64.s b/test/MC/ELF/basic-elf-64.s
index a77f3e6..d99125e 100644
--- a/test/MC/ELF/basic-elf-64.s
+++ b/test/MC/ELF/basic-elf-64.s
@@ -45,7 +45,7 @@ main:                                   # @main
 // CHECK:     Name: .rela.text
 
 // CHECK: Relocations [
-// CHECK:   Section (2) .rela.text {
+// CHECK:   Section {{.*}} .rela.text {
 // CHECK:     0x5  R_X86_64_32   .rodata.str1.1 0x0
 // CHECK:     0xA  R_X86_64_PC32 puts           0xFFFFFFFFFFFFFFFC
 // CHECK:     0xF  R_X86_64_32   .rodata.str1.1 0x6
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index 200f897..bbaa785 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -53,7 +53,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x3C8
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 72
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index 98caa01..be14a43 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -38,7 +38,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x490
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index 59f7400..d84ab82 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -40,7 +40,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x398
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index 178ba32..1efe0b2 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -35,7 +35,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index dfb0d4b..5e185d5 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -35,7 +35,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index 5394ee4..5c427d6 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -36,7 +36,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index a65b4fc..ad73b50 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -35,7 +35,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index 9441770..48fd879 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -36,7 +36,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 0dc69c8..15e1f9e 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -44,7 +44,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x3A0
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 360e7b0..53c2075 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -35,7 +35,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index 3a38948..d9de0ff 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -38,7 +38,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index e225797..19e1624 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -36,7 +36,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 2d37f4d..53da4dc 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -36,7 +36,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 568b315..7afeea6 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -36,7 +36,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
index b083901..00fd37f 100644
--- a/test/MC/ELF/cfi-window-save.s
+++ b/test/MC/ELF/cfi-window-save.s
@@ -38,7 +38,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 8662839..85aa3e1 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -43,7 +43,7 @@ f:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x398
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index 21be615..dc61dca 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -355,7 +355,7 @@ f37:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0xE70
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 1752
 // CHECK-NEXT:     Link: 7
 // CHECK-NEXT:     Info: 4
diff --git a/test/MC/ELF/common.s b/test/MC/ELF/common.s
index bd96564..b7e6ba0 100644
--- a/test/MC/ELF/common.s
+++ b/test/MC/ELF/common.s
@@ -45,7 +45,7 @@
 // CHECK-NEXT:     Binding: Local
 // CHECK-NEXT:     Type: Object
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:     Section: .bss
 // CHECK-NEXT:   }
 
 
diff --git a/test/MC/ELF/compression.s b/test/MC/ELF/compression.s
index 07b689e..5560ba7 100644
--- a/test/MC/ELF/compression.s
+++ b/test/MC/ELF/compression.s
@@ -10,7 +10,6 @@
 // Check for the 'ZLIB' file magic at the start of the section only
 // CHECK-NEXT: ZLIB
 // CHECK-NOT: ZLIB
-// CHECK: Contents of
 
 // Don't compress small sections, such as this simple debug_abbrev example
 // CHECK: Contents of section .debug_abbrev:
diff --git a/test/MC/ELF/debug-line.s b/test/MC/ELF/debug-line.s
index 38ef828..072265c 100644
--- a/test/MC/ELF/debug-line.s
+++ b/test/MC/ELF/debug-line.s
@@ -17,7 +17,7 @@
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 57
 // CHECK-NEXT:     Link: 0
 // CHECK-NEXT:     Info: 0
diff --git a/test/MC/ELF/ifunc-reloc.s b/test/MC/ELF/ifunc-reloc.s
index 0195463..6f1d79b 100644
--- a/test/MC/ELF/ifunc-reloc.s
+++ b/test/MC/ELF/ifunc-reloc.s
@@ -10,7 +10,7 @@ alias:
         callq sym
 
 // CHECK: Relocations [
-// CHECK-NEXT:   Section (2) .rela.text {
+// CHECK-NEXT:   Section {{.*}} .rela.text {
 // CHECK-NEXT:     0x1 R_X86_64_PC32 sym 0xFFFFFFFFFFFFFFFC
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
diff --git a/test/MC/ELF/local-reloc.s b/test/MC/ELF/local-reloc.s
index 19b9509..ce0b674 100644
--- a/test/MC/ELF/local-reloc.s
+++ b/test/MC/ELF/local-reloc.s
@@ -7,7 +7,7 @@
 foo:
 
 // CHECK:      Relocations [
-// CHECK:        Section (2) .rela.text {
+// CHECK:        Section {{.*}} .rela.text {
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32S .text 0x{{[^ ]+}}
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
diff --git a/test/MC/ELF/merge.s b/test/MC/ELF/merge.s
index d6e0b7c..e787728 100644
--- a/test/MC/ELF/merge.s
+++ b/test/MC/ELF/merge.s
@@ -21,7 +21,7 @@ zed:
 foo:
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section (2) .rela.text {
+// CHECK-NEXT:   Section {{.*}} .rela.text {
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PC32    .Lfoo 0x{{[^ ]+}}
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .sec1 0x{{[^ ]+}}
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .Lfoo 0x{{[^ ]+}}
diff --git a/test/MC/ELF/pr19582.s b/test/MC/ELF/pr19582.s
new file mode 100644
index 0000000..304cacb
--- /dev/null
+++ b/test/MC/ELF/pr19582.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
+
+a:
+    .section foo
+    c = b
+b:
+    // CHECK: 0x0 R_X86_64_PC32 .text 0x0
+    .long a - c
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index b998ea5..2af6add 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -4,7 +4,7 @@
 // correctly point to the section or the symbol.
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section (2) .rel.text {
+// CHECK-NEXT:   Section {{.*}} .rel.text {
 // CHECK-NEXT:     0x2          R_386_GOTOFF     .Lfoo 0x0
 // CHECK-NEXT:     0x{{[^ ]+}}  R_386_PLT32      bar2 0x0
 // CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOTPC      _GLOBAL_OFFSET_TABLE_ 0x0
@@ -79,7 +79,7 @@
 // CHECK-NEXT:     Binding: Local
 // CHECK-NEXT:     Type: TLS
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: zedsec (0x5)
+// CHECK-NEXT:     Section: zedsec
 // CHECK-NEXT:   }
 // Symbol 7 is section 4
 // CHECK:        Symbol {
@@ -89,7 +89,7 @@
 // CHECK-NEXT:     Binding: Local
 // CHECK-NEXT:     Type: Section
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:     Section: .bss
 // CHECK-NEXT:   }
 
         .text
diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s
index 0ce3201..a8783a7 100644
--- a/test/MC/ELF/relocation-pc.s
+++ b/test/MC/ELF/relocation-pc.s
@@ -13,13 +13,13 @@
 // CHECK-NEXT:   }
 
 // CHECK:        Section {
-// CHECK:          Index: 2
-// CHECK-NEXT:     Name: .rela.text
+// CHECK:          Index:
+// CHECK:          Name: .rela.text
 // CHECK-NEXT:     Type: SHT_RELA
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x2E8
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 48
 // CHECK-NEXT:     Link: 6
 // CHECK-NEXT:     Info: 1
diff --git a/test/MC/ELF/rename.s b/test/MC/ELF/rename.s
index 5364dde..a7f9638 100644
--- a/test/MC/ELF/rename.s
+++ b/test/MC/ELF/rename.s
@@ -15,33 +15,14 @@ defined3:
 
         .global defined1
 
-// Section 1 is .text
 // CHECK:        Section {
-// CHECK:          Index: 1
-// CHECK-NEXT:     Name: .text
-// CHECK-NEXT:     Type: SHT_PROGBITS
-// CHECK-NEXT:     Flags [
-// CHECK-NEXT:       SHF_ALLOC
-// CHECK-NEXT:       SHF_EXECINSTR
-// CHECK-NEXT:     ]
-// CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x40
-// CHECK-NEXT:     Size: 4
-// CHECK-NEXT:     Link: 0
-// CHECK-NEXT:     Info: 0
-// CHECK-NEXT:     AddressAlignment: 4
-// CHECK-NEXT:     EntrySize: 0
-// CHECK-NEXT:     Relocations [
-// CHECK-NEXT:     ]
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Section {
-// CHECK-NEXT:     Index: 2
-// CHECK-NEXT:     Name: .rela.text (1)
+// CHECK:          Index:
+// CHECK:          Name: .rela.text
 // CHECK-NEXT:     Type: SHT_RELA (0x4)
 // CHECK-NEXT:     Flags [ (0x0)
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x320
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 24
 // CHECK-NEXT:     Link: 6
 // CHECK-NEXT:     Info: 1
@@ -51,15 +32,3 @@ defined3:
 // CHECK-NEXT:       0x0 R_X86_64_32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
-
-
-// Symbol 2 is section 1
-// CHECK:        Symbol {
-// CHECK:          Name: .text (0)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: Section
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
diff --git a/test/MC/ELF/section-sym2.s b/test/MC/ELF/section-sym2.s
index acdb7d9..f62e3f9 100644
--- a/test/MC/ELF/section-sym2.s
+++ b/test/MC/ELF/section-sym2.s
@@ -6,7 +6,7 @@ mov .rodata, %rsi
 .section .rodata
 
 // CHECK:Relocations [
-// CHECK:  Section (2) .rela.text {
+// CHECK:  Section {{.*}} .rela.text {
 // CHECK:    Relocation {
 // CHECK:      Offset: 0x4
 // CHECK:      Type: R_X86_64_32S (11)
diff --git a/test/MC/ELF/section-unique-err1.s b/test/MC/ELF/section-unique-err1.s
new file mode 100644
index 0000000..3a997d1
--- /dev/null
+++ b/test/MC/ELF/section-unique-err1.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+// CHECK:  error: expected absolute expression
+
+        .section	.text,"ax",@progbits,unique, "abc"
diff --git a/test/MC/ELF/section-unique-err2.s b/test/MC/ELF/section-unique-err2.s
new file mode 100644
index 0000000..7b7cd5f
--- /dev/null
+++ b/test/MC/ELF/section-unique-err2.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+// CHECK:  error: unique id must be positive
+
+        .section	.text,"ax",@progbits,unique, -1
diff --git a/test/MC/ELF/section-unique-err3.s b/test/MC/ELF/section-unique-err3.s
new file mode 100644
index 0000000..bbccd24
--- /dev/null
+++ b/test/MC/ELF/section-unique-err3.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+// CHECK:  error: unique id is too large
+
+        .section	.text,"ax",@progbits,unique, 4294967295
diff --git a/test/MC/ELF/section-unique-err4.s b/test/MC/ELF/section-unique-err4.s
new file mode 100644
index 0000000..3c82682
--- /dev/null
+++ b/test/MC/ELF/section-unique-err4.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+// CHECK:  error: expected commma
+
+        .section	.text,"ax",@progbits,unique 1
diff --git a/test/MC/ELF/section-unique.s b/test/MC/ELF/section-unique.s
index b482af3..3fe7271 100644
--- a/test/MC/ELF/section-unique.s
+++ b/test/MC/ELF/section-unique.s
@@ -1,22 +1,22 @@
 // RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s
 // RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s --check-prefix=OBJ
 
-	.section	.text,"ax",@progbits,unique
+	.section	.text,"ax",@progbits,unique, 4294967293
         .globl	f
 f:
         nop
 
-	.section	.text,"ax",@progbits,unique
+	.section	.text,"ax",@progbits,unique, 4294967294
         .globl	g
 g:
         nop
 
 // test that f and g are in different sections.
 
-// CHECK: .section	.text,"ax",@progbits,unique
+// CHECK: .section	.text,"ax",@progbits,unique,4294967293
 // CHECK: f:
 
-// CHECK: .section	.text,"ax",@progbits,unique
+// CHECK: .section	.text,"ax",@progbits,unique,4294967294
 // CHECK: g:
 
 // OBJ: Symbol {
diff --git a/test/MC/ELF/symver-msvc.s b/test/MC/ELF/symver-msvc.s
index d6730ca..a726ff3 100644
--- a/test/MC/ELF/symver-msvc.s
+++ b/test/MC/ELF/symver-msvc.s
@@ -11,7 +11,7 @@
 
 
 // CHECK:       Relocations [
-// CHECK-NEXT:    Section (2) .rela.text {
+// CHECK-NEXT:    Section {{.*}} .rela.text {
 // CHECK-NEXT:      0x0 R_X86_64_32 ??_R0?AVexception@std@@@8 0x0
 // CHECK-NEXT:      0x4 R_X86_64_32 @??_R0?AVinvalid_argument@std@@@8 0x0
 // CHECK-NEXT:      0x8 R_X86_64_32 __imp_??_R0?AVlogic_error@std@@@8 0x0
diff --git a/test/MC/ELF/symver.s b/test/MC/ELF/symver.s
index 6e5825f..80d71fd 100644
--- a/test/MC/ELF/symver.s
+++ b/test/MC/ELF/symver.s
@@ -22,7 +22,7 @@ defined3:
 global1:
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section (2) .rela.text {
+// CHECK-NEXT:   Section {{.*}} .rela.text {
 // CHECK-NEXT:     0x0 R_X86_64_32 .text 0x0
 // CHECK-NEXT:     0x4 R_X86_64_32 bar2@zed 0x0
 // CHECK-NEXT:     0x8 R_X86_64_32 .text 0x0
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index 79865cd..940827b 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -19,7 +19,7 @@ foobar:
 // CHECK-NEXT:     Binding: Local
 // CHECK-NEXT:     Type: TLS
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .zed (0x5)
+// CHECK-NEXT:     Section: .zed
 // CHECK-NEXT:   }
 
 // CHECK:        Symbol {
diff --git a/test/MC/ELF/weak-diff2.s b/test/MC/ELF/weak-diff2.s
new file mode 100644
index 0000000..daf64a4
--- /dev/null
+++ b/test/MC/ELF/weak-diff2.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t 2>&1 | FileCheck %s
+
+// CHECK: error: Cannot represent a subtraction with a weak symbol
+
+.weak f
+f:
+    nop
+g:
+    nop
+.quad g - f
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
index 2288264..9485e49 100644
--- a/test/MC/ELF/weakref.s
+++ b/test/MC/ELF/weakref.s
@@ -131,7 +131,7 @@ bar15:
 // CHECK-NEXT:     Binding: Local
 // CHECK-NEXT:     Type: Section
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .data (0x3)
+// CHECK-NEXT:     Section: .data
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: .bss
@@ -140,7 +140,7 @@ bar15:
 // CHECK-NEXT:     Binding: Local
 // CHECK-NEXT:     Type: Section
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:     Section: .bss
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: bar10
diff --git a/test/MC/Hexagon/inst_select.ll b/test/MC/Hexagon/inst_select.ll
index 7e88c65..29a2db0 100644
--- a/test/MC/Hexagon/inst_select.ll
+++ b/test/MC/Hexagon/inst_select.ll
@@ -7,4 +7,4 @@ define i32 @foo (i1 %a, i32 %b, i32 %c)
   ret i32 %1
 }
 
-; CHECK:  0000 00400085 004201f4 00c09f52
+; CHECK:  0000 00400085 00600174 00608274 00c09f52
diff --git a/test/MC/MachO/ARM/aliased-symbols.s b/test/MC/MachO/ARM/aliased-symbols.s
index e87b81c..cc2e200 100644
--- a/test/MC/MachO/ARM/aliased-symbols.s
+++ b/test/MC/MachO/ARM/aliased-symbols.s
@@ -45,9 +45,9 @@ Ltmp0:
 // CHECK-NEXT:   Value: 0x[[DEFINED_EARLY]]
 // CHECK-NEXT: }
 
-        // defined_late was defined. Just after defined_early.
+        // alias_to_late was an alias to defined_late. But we can resolve it.
 // CHECK: Symbol {
-// CHECK-NEXT:   Name: defined_late
+// CHECK-NEXT:   Name: alias_to_late
 // CHECK-NEXT:   Type: Section (0xE)
 // CHECK-NEXT:   Section: __data (0x2)
 // CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
@@ -56,9 +56,9 @@ Ltmp0:
 // CHECK-NEXT:   Value: 0x[[DEFINED_LATE:[0-9A-F]+]]
 // CHECK-NEXT: }
 
-        // alias_to_late was an alias to defined_late. But we can resolve it.
+        // defined_late was defined. Just after defined_early.
 // CHECK: Symbol {
-// CHECK-NEXT:   Name: alias_to_late
+// CHECK-NEXT:   Name: defined_late
 // CHECK-NEXT:   Type: Section (0xE)
 // CHECK-NEXT:   Section: __data (0x2)
 // CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
@@ -72,7 +72,7 @@ Ltmp0:
 // CHECK: Symbol {
 // CHECK-NEXT:   Name: alias_to_local (42)
 // CHECK-NEXT:   Type: Section (0xE)
-// CHECK-NEXT:   Section:  (0x0)
+// CHECK-NEXT:   Section: __data (0x2)
 // CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
 // CHECK-NEXT:   Flags [ (0x0)
 // CHECK-NEXT:   ]
diff --git a/test/MC/Mips/elf-tls.s b/test/MC/Mips/elf-tls.s
index d32a699..d50f62c 100644
--- a/test/MC/Mips/elf-tls.s
+++ b/test/MC/Mips/elf-tls.s
@@ -3,7 +3,7 @@
 // Check that the appropriate relocations were created.
 
 // CHECK: Relocations [
-// CHECK:   Section (2) .rel.text {
+// CHECK:   Section {{.*}} .rel.text {
 // CHECK:     R_MIPS_TLS_LDM
 // CHECK:     R_MIPS_TLS_DTPREL_HI16
 // CHECK:     R_MIPS_TLS_DTPREL_LO16
diff --git a/test/MC/Mips/insn-directive.s b/test/MC/Mips/insn-directive.s
new file mode 100644
index 0000000..760a273
--- /dev/null
+++ b/test/MC/Mips/insn-directive.s
@@ -0,0 +1,98 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 | FileCheck %s --check-prefix=ASM
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
+# RUN:   llvm-readobj -symbols - | FileCheck %s --check-prefix=OBJ
+
+  .set micromips
+
+  .global f_mm_insn_data
+  .type f_mm_insn_data, @function
+f_mm_insn_data:
+  .insn
+  .word 0x00e73910   # add $7, $7, $7
+
+  .global f_mm_insn_instr
+  .type f_mm_insn_instr, @function
+f_mm_insn_instr:
+  .insn
+  add $7, $7, $7
+
+  .global o_mm_insn_data
+  .type o_mm_insn_data, @object
+o_mm_insn_data:
+  .insn
+  .word 0x00e73910   # add $7, $7, $7
+
+  .global o_mm_insn_instr
+  .type o_mm_insn_instr, @object
+o_mm_insn_instr:
+  .insn
+  add $7, $7, $7
+
+  .set nomicromips
+
+  .global f_normal_insn_data
+  .type f_normal_insn_data, @function
+f_normal_insn_data:
+  .insn
+  .word 0x00e73820   # add $7, $7, $7
+
+  .global f_normal_insn_instr
+  .type f_normal_insn_instr, @function
+f_normal_insn_instr:
+  .insn
+  add $7, $7, $7
+
+  .global o_normal_insn_data
+  .type o_normal_insn_data, @object
+o_normal_insn_data:
+  .insn
+  .word 0x00e73820   # add $7, $7, $7
+
+  .global o_normal_insn_instr
+  .type o_normal_insn_instr, @object
+o_normal_insn_instr:
+  .insn
+  add $7, $7, $7
+
+# Verify that .insn causes the currently saved labels to be cleared by checking
+# that foo doesn't get marked.
+  .set nomicromips
+foo:
+  .insn
+  .word 0x00e73820   # add $7, $7, $7
+
+  .set micromips
+bar:
+  add $7, $7, $7
+
+# ASM: .insn
+
+# OBJ: Symbols [
+# OBJ: Name: foo
+# OBJ: Other: 0
+
+# OBJ: Name: f_mm_insn_data
+# OBJ: Other: 128
+
+# OBJ: Name: f_mm_insn_instr
+# OBJ: Other: 128
+
+# OBJ: Name: f_normal_insn_data
+# OBJ: Other: 0
+
+# OBJ: Name: f_normal_insn_instr
+# OBJ: Other: 0
+
+# OBJ: Name: o_mm_insn_data
+# OBJ: Other: 128
+
+# OBJ: Name: o_mm_insn_instr
+# OBJ: Other: 128
+
+# OBJ: Name: o_normal_insn_data
+# OBJ: Other: 0
+
+# OBJ: Name: o_normal_insn_instr
+# OBJ: Other: 0
+# OBJ: ]
diff --git a/test/MC/Mips/micromips-alias.s b/test/MC/Mips/micromips-alias.s
index c0bf4b3..256b3b6 100644
--- a/test/MC/Mips/micromips-alias.s
+++ b/test/MC/Mips/micromips-alias.s
@@ -14,3 +14,15 @@ f:
   nop
   .globl bar
 bar = f
+
+# CHECK: Name: foo
+# CHECK: Other: 128
+  .type  o,@object
+  .set   micromips
+o:
+  .insn
+  .word 0x00000000
+  .set   nomicromips
+
+  .globl foo
+foo = o
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
index 8d85169..d9bac20 100644
--- a/test/MC/Mips/mips-expansions-bad.s
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -1,6 +1,8 @@
 # RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
 # RUN: FileCheck %s < %t1
 
-        .text
-        li $5, 0x100000000 # CHECK: :[[@LINE]]:9: error: instruction requires a 64-bit architecture
-        dli $5, 1 # CHECK: :[[@LINE]]:9: error: instruction requires a 64-bit architecture
+  .text
+  li $5, 0x100000000
+  # CHECK: :[[@LINE-1]]:3: error: instruction requires a 64-bit architecture
+  dli $5, 1
+  # CHECK: :[[@LINE-1]]:3: error: instruction requires a 64-bit architecture
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index bdc76fb..490b814 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -50,6 +50,17 @@
 # CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
 # CHECK: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
 
+# CHECK:     lui     $8, %hi(symbol)     # encoding: [A,A,0x08,0x3c]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-NOT: move    $8, $8              # encoding: [0x21,0x40,0x00,0x01]
+# CHECK:     lw      $8, %lo(symbol)($8) # encoding: [A,A,0x08,0x8d]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:     lui     $1, %hi(symbol)     # encoding: [A,A,0x01,0x3c]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-NOT: move    $1, $1              # encoding: [0x21,0x08,0x20,0x00]
+# CHECK:     sw      $8, %lo(symbol)($1) # encoding: [A,A,0x28,0xac]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+
 # CHECK: lui     $1, %hi(symbol)
 # CHECK: ldc1    $f0, %lo(symbol)($1)
 # CHECK: lui     $1, %hi(symbol)
@@ -77,5 +88,8 @@
     lw  $t2, 655483($a0)
     sw  $t2, 123456($t1)
 
+    lw  $8, symbol
+    sw  $8, symbol
+
     ldc1 $f0, symbol
     sdc1 $f0, symbol
diff --git a/test/MC/Mips/mips-jump-delay-slots.s b/test/MC/Mips/mips-jump-delay-slots.s
index 49f6c15..c52416f 100644
--- a/test/MC/Mips/mips-jump-delay-slots.s
+++ b/test/MC/Mips/mips-jump-delay-slots.s
@@ -68,9 +68,15 @@
         # CHECK: beql $9, $6, 1332
         # CHECK-NOT: nop
         beql $9,$6,1332
+        # CHECK: beql $9, $zero, 1332
+        # CHECK-NOT: nop
+        beqzl $9,1332
         # CHECK: bnel $9, $6, 1332
         # CHECK-NOT: nop
         bnel $9,$6,1332
+        # CHECK: bnel $9, $zero, 1332
+        # CHECK-NOT: nop
+        bnezl $9,1332
         # CHECK: bgezl $6, 1332
         # CHECK-NOT: nop
         bgezl $6,1332
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index d18f6f5..e2feeac 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -41,6 +41,11 @@
         div.s     $f4,$f5,$f15
         divu      $zero,$25,$15
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         lh        $11,-8556($s5)
@@ -117,3 +122,5 @@
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index 6ee6512..93fdbaf 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -57,6 +57,11 @@
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ldc1      $f11,16391($s0)
@@ -166,3 +171,5 @@
         trunc.w.s $f28,$f30
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 6d55079..954631d 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips3 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -109,6 +109,11 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
@@ -229,3 +234,5 @@
         trunc.w.s $f28,$f30
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index ba75d77..3765044 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -66,6 +66,11 @@
         eret
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ldc1      $f11,16391($s0)
@@ -196,3 +201,5 @@
         trunc.w.s $f28,$f30
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 61be290..ee7af3f 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -72,6 +72,11 @@
         eret
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
         jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
@@ -235,3 +240,5 @@
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
         synci     -15842($a2)          # CHECK: synci -15842($6)        # encoding: [0x04,0xdf,0xc2,0x1e]
+
+1:
diff --git a/test/MC/Mips/mips32r3/valid.s b/test/MC/Mips/mips32r3/valid.s
index ff6589d..0a4e5b1 100644
--- a/test/MC/Mips/mips32r3/valid.s
+++ b/test/MC/Mips/mips32r3/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r3 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -72,6 +72,11 @@
         eret
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
         jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
@@ -235,3 +240,5 @@
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
         synci     -15842($a2)          # CHECK: synci -15842($6)        # encoding: [0x04,0xdf,0xc2,0x1e]
+
+1:
diff --git a/test/MC/Mips/mips32r5/valid.s b/test/MC/Mips/mips32r5/valid.s
index 408d0cc..036b908 100644
--- a/test/MC/Mips/mips32r5/valid.s
+++ b/test/MC/Mips/mips32r5/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r5 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -72,6 +72,11 @@
         eret
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
         jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
@@ -235,3 +240,5 @@
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
         synci     -15842($a2)          # CHECK: synci -15842($6)        # encoding: [0x04,0xdf,0xc2,0x1e]
+
+1:
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 7033d4a..2c3a5b2 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -12,7 +12,7 @@
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 2> %t0 | FileCheck %s
 # RUN: FileCheck %s -check-prefix=WARNING < %t0
-
+a:
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
         and     $2,4             # CHECK: andi $2, $2, 4      # encoding: [0x30,0x42,0x00,0x04]
@@ -146,6 +146,11 @@
         rint.d $f2, $f4          # CHECK: rint.d $f2, $f4        # encoding: [0x46,0x20,0x20,0x9a]
         class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
         class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x46,0x20,0x20,0x9b]
+        j       1f               # CHECK: j $tmp0                # encoding: [0b000010AA,A,A,A]
+                                 # CHECK:                        #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j       a                # CHECK: j a                    # encoding: [0b000010AA,A,A,A]
+                                 # CHECK:                        #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j       1328             # CHECK: j 1328                 # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb   $4               # CHECK: jr.hb $4               # encoding: [0x00,0x80,0x04,0x09]
         jalr.hb $4               # CHECK: jalr.hb $4             # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb $4, $5           # CHECK: jalr.hb $4, $5         # encoding: [0x00,0xa0,0x24,0x09]
@@ -176,3 +181,5 @@
         tne     $6,$17           # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
         tne     $7,$8,885        # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         xor     $2, 4            # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 7fcf781..a23990c 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips4 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -113,6 +113,11 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
@@ -258,3 +263,5 @@
         trunc.w.s $f28,$f30
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 4b1282e..094c07f 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips5 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -113,6 +113,11 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
@@ -260,3 +265,5 @@
         trunc.w.s $f28,$f30
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index d900ab7..1a65152 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -118,6 +118,11 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         lb        $24,-14515($10)
         lbu       $8,30195($v1)
         ld        $sp,-28645($s1)
@@ -277,3 +282,5 @@
         trunc.w.s $f28,$f30
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 7193451..61b1d6d 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -131,6 +131,11 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
         jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
@@ -304,3 +309,5 @@
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
         wsbh      $k1,$9
+
+1:
diff --git a/test/MC/Mips/mips64r3/valid.s b/test/MC/Mips/mips64r3/valid.s
index 3a3f7ad..bfd16d3 100644
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r3 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -131,6 +131,11 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
         jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
@@ -304,3 +309,5 @@
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
         wsbh      $k1,$9
+
+1:
diff --git a/test/MC/Mips/mips64r5/valid.s b/test/MC/Mips/mips64r5/valid.s
index 5ba102d..22c5093 100644
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@@ -1,7 +1,7 @@
 # Instructions that are valid
 #
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r5 | FileCheck %s
-
+a:
         .set noat
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
@@ -131,6 +131,11 @@
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
+        j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j         a                    # CHECK: j a     # encoding: [0b000010AA,A,A,A]
+                                       # CHECK:         #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j         1328                 # CHECK: j 1328  # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb     $4                   # CHECK: jr.hb  $4 # encoding: [0x00,0x80,0x04,0x08]
         jalr.hb   $4                   # CHECK: jalr.hb  $4 # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb   $4, $5               # CHECK: jalr.hb  $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
@@ -304,3 +309,5 @@
         xor       $s2,$a0,$s8
         xor       $2, 4                # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
         wsbh      $k1,$9
+
+1:
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 600cb48..31a0d7f 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -12,7 +12,7 @@
 #
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 2> %t0 | FileCheck %s
 # RUN: FileCheck %s -check-prefix=WARNING < %t0
-
+a:
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
         and     $2,4           # CHECK: andi $2, $2, 4        # encoding: [0x30,0x42,0x00,0x04]
@@ -165,6 +165,11 @@
         rint.d $f2, $f4          # CHECK: rint.d $f2, $f4        # encoding: [0x46,0x20,0x20,0x9a]
         class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
         class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x46,0x20,0x20,0x9b]
+        j       1f               # CHECK: j $tmp0                # encoding: [0b000010AA,A,A,A]
+                                 # CHECK:                        #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
+        j       a                # CHECK: j a                    # encoding: [0b000010AA,A,A,A]
+                                 # CHECK:                        #   fixup A - offset: 0, value: a, kind: fixup_Mips_26
+        j       1328             # CHECK: j 1328                 # encoding: [0x08,0x00,0x01,0x4c]
         jr.hb   $4               # CHECK: jr.hb $4               # encoding: [0x00,0x80,0x04,0x09]
         jalr.hb $4               # CHECK: jalr.hb $4             # encoding: [0x00,0x80,0xfc,0x09]
         jalr.hb $4, $5           # CHECK: jalr.hb $4, $5         # encoding: [0x00,0xa0,0x24,0x09]
@@ -199,3 +204,5 @@
         tne     $6,$17           # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
         tne     $7,$8,885        # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         xor     $2, 4            # CHECK: xori $2, $2, 4         # encoding: [0x38,0x42,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/set-defined-symbol.s b/test/MC/Mips/set-defined-symbol.s
new file mode 100644
index 0000000..54db45d
--- /dev/null
+++ b/test/MC/Mips/set-defined-symbol.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
+# RUN:   llvm-objdump -d -r -arch=mips - | FileCheck %s
+
+  .global foo
+  .weak bar
+  .set bar, b
+  .set foo, b
+  .set foo, a
+a:
+  nop
+# CHECK-NOT: a:
+# CHECK: foo:
+
+b:
+  nop
+# CHECK-NOT: b:
+# CHECK-NOT: foo:
+# CHECK: bar:
diff --git a/test/MC/Mips/sort-relocation-table.s b/test/MC/Mips/sort-relocation-table.s
new file mode 100644
index 0000000..590f8fd
--- /dev/null
+++ b/test/MC/Mips/sort-relocation-table.s
@@ -0,0 +1,125 @@
+# RUN: llvm-mc -filetype=obj -arch mipsel %s | llvm-readobj -r | FileCheck %s
+
+# Test the order of records in the relocation table.
+# *HI16 and local *GOT16 relocations should be immediately followed by the
+# corresponding *LO16 relocation against the same symbol.
+#
+# We try to implement the same semantics as gas, ie. to order the relocation
+# table the same way as gas.
+#
+# gnu as command line:
+# mips-linux-gnu-as -EL sort-relocation-table.s -o sort-relocation-table.o
+#
+# TODO: Add mips16 and micromips tests.
+# Note: offsets are part of expected output, so it's simpler to add new test
+#       cases at the bottom of the file.
+
+# CHECK:       Relocations [
+# CHECK-NEXT:  {
+
+# Put HI before LO.
+addiu $2,$2,%lo(sym1)
+lui $2,%hi(sym1)
+
+# CHECK-NEXT:    0x4 R_MIPS_HI16 sym1
+# CHECK-NEXT:    0x0 R_MIPS_LO16 sym1
+
+# When searching for a matching LO, ignore LOs against a different symbol.
+addiu $2,$2,%lo(sym2)
+lui $2,%hi(sym2)
+addiu $2,$2,%lo(sym2_d)
+
+# CHECK-NEXT:    0xC R_MIPS_HI16 sym2
+# CHECK-NEXT:    0x8 R_MIPS_LO16 sym2
+# CHECK-NEXT:    0x10 R_MIPS_LO16 sym2_d
+
+# Match HI with 2nd LO because it has higher offset (than the 1st LO).
+addiu $2,$2,%lo(sym3)
+addiu $2,$2,%lo(sym3)
+lui $2,%hi(sym3)
+
+# CHECK-NEXT:    0x14 R_MIPS_LO16 sym3
+# CHECK-NEXT:    0x1C R_MIPS_HI16 sym3
+# CHECK-NEXT:    0x18 R_MIPS_LO16 sym3
+
+# HI is already followed by a matching LO, so don't look further, ie. ignore the
+# "free" LO with higher offset.
+lui $2,%hi(sym4)
+addiu $2,$2,%lo(sym4)
+addiu $2,$2,%lo(sym4)
+
+# CHECK-NEXT:    0x20 R_MIPS_HI16 sym4
+# CHECK-NEXT:    0x24 R_MIPS_LO16 sym4
+# CHECK-NEXT:    0x28 R_MIPS_LO16 sym4
+
+# Match 2nd HI with 2nd LO, since it's the one with highest offset among the
+# "free" ones.
+addiu $2,$2,%lo(sym5)
+addiu $2,$2,%lo(sym5)
+lui $2,%hi(sym5)
+addiu $2,$2,%lo(sym5)
+lui $2,%hi(sym5)
+
+# CHECK-NEXT:    0x2C R_MIPS_LO16 sym5
+# CHECK-NEXT:    0x3C R_MIPS_HI16 sym5
+# CHECK-NEXT:    0x30 R_MIPS_LO16 sym5
+# CHECK-NEXT:    0x34 R_MIPS_HI16 sym5
+# CHECK-NEXT:    0x38 R_MIPS_LO16 sym5
+
+# When more HIs are matched with one LO, sort them in descending order of
+# offset.
+addiu $2,$2,%lo(sym6)
+lui $2,%hi(sym6)
+lui $2,%hi(sym6)
+
+# CHECK-NEXT:    0x48 R_MIPS_HI16 sym6
+# CHECK-NEXT:    0x44 R_MIPS_HI16 sym6
+# CHECK-NEXT:    0x40 R_MIPS_LO16 sym6
+
+#  sym7 is a local symbol, so GOT relocation against it needs a matching LO.
+sym7:
+addiu $2,$2,%lo(sym7)
+lui $2,%got(sym7)
+
+# CHECK-NEXT:    0x50 R_MIPS_GOT16 sym7
+# CHECK-NEXT:    0x4C R_MIPS_LO16 sym7
+
+# sym8 is not a local symbol, don't look for a matching LO for GOT.
+.global sym8
+addiu $2,$2,%lo(sym8)
+lui $2,%got(sym8)
+
+# CHECK-NEXT:    0x54 R_MIPS_LO16 sym8
+# CHECK-NEXT:    0x58 R_MIPS_GOT16 sym8
+
+# A small combination of previous checks.
+symc1:
+addiu $2,$2,%lo(symc1)
+addiu $2,$2,%lo(symc1)
+addiu $2,$2,%lo(symc1)
+lui $2,%hi(symc1)
+lui $2,%got(symc1)
+addiu $2,$2,%lo(symc2)
+lui $2,%hi(symc1)
+lui $2,%hi(symc1)
+lui $2,%got(symc2)
+lui $2,%hi(symc1)
+addiu $2,$2,%lo(symc1)
+addiu $2,$2,%lo(symc2)
+lui $2,%hi(symc1)
+lui $2,%hi(symc1)
+
+# CHECK-NEXT:    0x78 R_MIPS_HI16 symc1
+# CHECK-NEXT:    0x74 R_MIPS_HI16 symc1
+# CHECK-NEXT:    0x6C R_MIPS_GOT16 symc1
+# CHECK-NEXT:    0x68 R_MIPS_HI16 symc1
+# CHECK-NEXT:    0x5C R_MIPS_LO16 symc1
+# CHECK-NEXT:    0x8C R_MIPS_HI16 symc1
+# CHECK-NEXT:    0x60 R_MIPS_LO16 symc1
+# CHECK-NEXT:    0x90 R_MIPS_HI16 symc1
+# CHECK-NEXT:    0x64 R_MIPS_LO16 symc1
+# CHECK-NEXT:    0x70 R_MIPS_LO16 symc2
+# CHECK-NEXT:    0x7C R_MIPS_GOT16 symc2
+# CHECK-NEXT:    0x80 R_MIPS_HI16 symc1
+# CHECK-NEXT:    0x84 R_MIPS_LO16 symc1
+# CHECK-NEXT:    0x88 R_MIPS_LO16 symc2
diff --git a/test/MC/Mips/xgot.s b/test/MC/Mips/xgot.s
index 3084806..3380a85 100644
--- a/test/MC/Mips/xgot.s
+++ b/test/MC/Mips/xgot.s
@@ -9,8 +9,8 @@
 // CHECK:   0x14 R_MIPS_GOT_HI16 ext_1
 // CHECK:   0x1C R_MIPS_GOT_LO16 ext_1
 // CHECK:   0x24 R_MIPS_CALL_HI16 printf
-// CHECK:   0x2C R_MIPS_GOT16 $.str
 // CHECK:   0x30 R_MIPS_CALL_LO16 printf
+// CHECK:   0x2C R_MIPS_GOT16 $.str
 // CHECK:   0x38 R_MIPS_LO16 $.str
 // CHECK: ]
 
diff --git a/test/MC/PowerPC/ppc-reloc.s b/test/MC/PowerPC/ppc-reloc.s
index e7dd1e2..999d33e 100644
--- a/test/MC/PowerPC/ppc-reloc.s
+++ b/test/MC/PowerPC/ppc-reloc.s
@@ -12,7 +12,7 @@ foo:
 	.size foo, . - foo
 
 # CHECK:      Relocations [
-# CHECK-NEXT:   Section (2) .rela.text {
+# CHECK-NEXT:   Section {{.*}} .rela.text {
 # CHECK-NEXT:     0x0 R_PPC_PLTREL24 printf 0x0
 # CHECK-NEXT:     0x4 R_PPC_LOCAL24PC _GLOBAL_OFFSET_TABLE_ 0xFFFFFFFC
 # CHECK-NEXT:   }
diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s
index d2ac669..05fde62 100644
--- a/test/MC/PowerPC/ppc64-encoding.s
+++ b/test/MC/PowerPC/ppc64-encoding.s
@@ -420,12 +420,20 @@
             divwu. 2, 3, 4
 # FIXME:    divwuo 2, 3, 4
 # FIXME:    divwuo. 2, 3, 4
-# FIXME:    divwe 2, 3, 4
-# FIXME:    divwe. 2, 3, 4
+# CHECK-BE: divwe 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0x56]
+# CHECK-LE: divwe 2, 3, 4                   # encoding: [0x56,0x23,0x43,0x7c]
+            divwe 2, 3, 4
+# CHECK-BE: divwe. 2, 3, 4                  # encoding: [0x7c,0x43,0x23,0x57]
+# CHECK-LE: divwe. 2, 3, 4                  # encoding: [0x57,0x23,0x43,0x7c]
+            divwe. 2, 3, 4
 # FIXME:    divweo 2, 3, 4
 # FIXME:    divweo. 2, 3, 4
-# FIXME:    divweu 2, 3, 4
-# FIXME:    divweu. 2, 3, 4
+# CHECK-BE: divweu 2, 3, 4                  # encoding: [0x7c,0x43,0x23,0x16]
+# CHECK-LE: divweu 2, 3, 4                  # encoding: [0x16,0x23,0x43,0x7c]
+            divweu 2, 3, 4
+# CHECK-BE: divweu. 2, 3, 4                 # encoding: [0x7c,0x43,0x23,0x17]
+# CHECK-LE: divweu. 2, 3, 4                 # encoding: [0x17,0x23,0x43,0x7c]
+            divweu. 2, 3, 4
 # FIXME:    divweuo 2, 3, 4
 # FIXME:    divweuo. 2, 3, 4
 
@@ -466,12 +474,20 @@
             divdu. 2, 3, 4
 # FIXME:    divduo 2, 3, 4
 # FIXME:    divduo. 2, 3, 4
-# FIXME:    divde 2, 3, 4
-# FIXME:    divde. 2, 3, 4
+# CHECK-BE: divde 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0x52]
+# CHECK-LE: divde 2, 3, 4                   # encoding: [0x52,0x23,0x43,0x7c]
+            divde 2, 3, 4
+# CHECK-BE: divde. 2, 3, 4                  # encoding: [0x7c,0x43,0x23,0x53]
+# CHECK-LE: divde. 2, 3, 4                  # encoding: [0x53,0x23,0x43,0x7c]
+            divde. 2, 3, 4
 # FIXME:    divdeo 2, 3, 4
 # FIXME:    divdeo. 2, 3, 4
-# FIXME:    divdeu 2, 3, 4
-# FIXME:    divdeu. 2, 3, 4
+# CHECK-BE: divdeu 2, 3, 4                  # encoding: [0x7c,0x43,0x23,0x12]
+# CHECK-LE: divdeu 2, 3, 4                  # encoding: [0x12,0x23,0x43,0x7c]
+            divdeu 2, 3, 4
+# CHECK-BE: divdeu. 2, 3, 4                 # encoding: [0x7c,0x43,0x23,0x13]
+# CHECK-LE: divdeu. 2, 3, 4                 # encoding: [0x13,0x23,0x43,0x7c]
+            divdeu. 2, 3, 4
 # FIXME:    divdeuo 2, 3, 4
 # FIXME:    divdeuo. 2, 3, 4
 
@@ -644,7 +660,9 @@
 # CHECK-BE: popcntd 2, 3                    # encoding: [0x7c,0x62,0x03,0xf4]
 # CHECK-LE: popcntd 2, 3                    # encoding: [0xf4,0x03,0x62,0x7c]
             popcntd 2, 3
-# FIXME:    bpermd 2, 3, 4
+# CHECK-BE: bpermd 2, 3, 4                  # encoding: [0x7c,0x62,0x21,0xf8]
+# CHECK-LE: bpermd 2, 3, 4                  # encoding: [0xf8,0x21,0x62,0x7c]
+            bpermd 2, 3, 4
 
 # Fixed-point rotate and shift instructions
 
@@ -703,6 +721,33 @@
 # CHECK-LE: rldimi. 2, 3, 4, 5              # encoding: [0x4d,0x21,0x62,0x78]
             rldimi. 2, 3, 4, 5
 
+# Aliases that take bit masks...
+
+# CHECK-BE: rlwinm  0, 0, 30, 31, 31        # encoding: [0x54,0x00,0xf7,0xfe]
+            rlwinm  0, 0, 30, 1
+# CHECK-BE: rlwinm. 0, 0, 30, 31, 31        # encoding: [0x54,0x00,0xf7,0xff]
+            rlwinm. 0, 0, 30, 1
+# CHECK-BE: rlwinm  0, 0, 30, 31, 0         # encoding: [0x54,0x00,0xf7,0xc0]
+            rlwinm  0, 0, 30, 2147483649
+# CHECK-BE: rlwinm. 0, 0, 30, 31, 0         # encoding: [0x54,0x00,0xf7,0xc1]
+            rlwinm. 0, 0, 30, 2147483649
+# CHECK-BE: rlwimi  0, 0, 30, 31, 31        # encoding: [0x50,0x00,0xf7,0xfe]
+            rlwimi  0, 0, 30, 1
+# CHECK-BE: rlwimi. 0, 0, 30, 31, 31        # encoding: [0x50,0x00,0xf7,0xff]
+            rlwimi. 0, 0, 30, 1
+# CHECK-BE: rlwimi  0, 0, 30, 31, 0         # encoding: [0x50,0x00,0xf7,0xc0]
+            rlwimi  0, 0, 30, 2147483649
+# CHECK-BE: rlwimi. 0, 0, 30, 31, 0         # encoding: [0x50,0x00,0xf7,0xc1]
+            rlwimi. 0, 0, 30, 2147483649
+# CHECK-BE: rlwnm   0, 0, 30, 31, 31        # encoding: [0x5c,0x00,0xf7,0xfe]
+            rlwnm  0, 0, 30, 1
+# CHECK-BE: rlwnm.  0, 0, 30, 31, 31        # encoding: [0x5c,0x00,0xf7,0xff]
+            rlwnm. 0, 0, 30, 1
+# CHECK-BE: rlwnm   0, 0, 30, 31, 0         # encoding: [0x5c,0x00,0xf7,0xc0]
+            rlwnm  0, 0, 30, 2147483649
+# CHECK-BE: rlwnm.  0, 0, 30, 31, 0         # encoding: [0x5c,0x00,0xf7,0xc1]
+            rlwnm. 0, 0, 30, 2147483649
+
 # CHECK-BE: slw 2, 3, 4                     # encoding: [0x7c,0x62,0x20,0x30]
 # CHECK-LE: slw 2, 3, 4                     # encoding: [0x30,0x20,0x62,0x7c]
             slw 2, 3, 4
diff --git a/test/MC/PowerPC/tls-gd-obj.s b/test/MC/PowerPC/tls-gd-obj.s
index 63d47ee..fb4ab8b 100644
--- a/test/MC/PowerPC/tls-gd-obj.s
+++ b/test/MC/PowerPC/tls-gd-obj.s
@@ -47,7 +47,7 @@ a:
 // for the call to __tls_get_addr.
 //
 // CHECK: Relocations [
-// CHECK:   Section (2) .rela.text {
+// CHECK:   Section {{.*}} .rela.text {
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_HA a
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_LO a
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSGD          a
diff --git a/test/MC/PowerPC/tls-ie-obj.s b/test/MC/PowerPC/tls-ie-obj.s
index c8c5d91..f7de644 100644
--- a/test/MC/PowerPC/tls-ie-obj.s
+++ b/test/MC/PowerPC/tls-ie-obj.s
@@ -36,7 +36,7 @@ main:                                   # @main
 // accessing external variable a.
 //
 // CHECK: Relocations [
-// CHECK:   Section (2) .rela.text {
+// CHECK:   Section {{.*}} .rela.text {
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA    a
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS a
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLS               a
diff --git a/test/MC/PowerPC/tls-ld-obj.s b/test/MC/PowerPC/tls-ld-obj.s
index b0c4a7a..1fa371d 100644
--- a/test/MC/PowerPC/tls-ld-obj.s
+++ b/test/MC/PowerPC/tls-ld-obj.s
@@ -50,7 +50,7 @@ a:
 // __tls_get_addr.
 //
 // CHECK: Relocations [
-// CHECK:   Section (2) .rela.text {
+// CHECK:   Section {{.*}} .rela.text {
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_HA a
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_LO a
 // CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSLD          a
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s
index 4a0053d..c317c37 100644
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@@ -454,3 +454,20 @@
 # CHECK-BE: xxpermdi 7, 63, 63, 2              # encoding: [0xf0,0xff,0xfa,0x56]
 # CHECK-LE: xxpermdi 7, 63, 63, 2              # encoding: [0x56,0xfa,0xff,0xf0]
             xxswapd 7, 63
+
+# Move to/from VSR
+# CHECK-BE: mfvsrd 3, 0                        # encoding: [0x7c,0x03,0x00,0x66]
+# CHECK-LE: mfvsrd 3, 0                        # encoding: [0x66,0x00,0x03,0x7c]
+            mfvsrd 3, 0
+# CHECK-BE: mfvsrwz 5, 0                       # encoding: [0x7c,0x05,0x00,0xe6]
+# CHECK-LE: mfvsrwz 5, 0                       # encoding: [0xe6,0x00,0x05,0x7c]
+            mfvsrwz 5, 0
+# CHECK-BE: mtvsrd 0, 3                        # encoding: [0x7c,0x03,0x01,0x66]
+# CHECK-LE: mtvsrd 0, 3                        # encoding: [0x66,0x01,0x03,0x7c]
+            mtvsrd 0, 3
+# CHECK-BE: mtvsrwa 0, 3                       # encoding: [0x7c,0x03,0x01,0xa6]
+# CHECK-LE: mtvsrwa 0, 3                       # encoding: [0xa6,0x01,0x03,0x7c]
+            mtvsrwa 0, 3
+# CHECK-BE: mtvsrwz 0, 3                       # encoding: [0x7c,0x03,0x01,0xe6]
+# CHECK-LE: mtvsrwz 0, 3                       # encoding: [0xe6,0x01,0x03,0x7c]
+            mtvsrwz 0, 3
diff --git a/test/MC/R600/ds-err.s b/test/MC/R600/ds-err.s
new file mode 100644
index 0000000..52c2740
--- /dev/null
+++ b/test/MC/R600/ds-err.s
@@ -0,0 +1,23 @@
+// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
+
+// offset too big
+// CHECK: invalid operand for instruction
+ds_add_u32 v2, v4 offset:1000000000
+
+// offset0 twice
+// CHECK:  error: not a valid operand.
+ds_write2_b32 v2, v4, v6 offset0:4 offset0:8
+
+// offset1 twice
+// CHECK:  error: not a valid operand.
+ds_write2_b32 v2, v4, v6 offset1:4 offset1:8
+
+// offset0 too big
+// CHECK: invalid operand for instruction
+ds_write2_b32 v2, v4, v6 offset0:1000000000
+
+// offset1 too big
+// CHECK: invalid operand for instruction
+ds_write2_b32 v2, v4, v6 offset1:1000000000
+
diff --git a/test/MC/R600/ds.s b/test/MC/R600/ds.s
new file mode 100644
index 0000000..ad63229
--- /dev/null
+++ b/test/MC/R600/ds.s
@@ -0,0 +1,337 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI  -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Checks for 16-bit Offsets
+//===----------------------------------------------------------------------===//
+
+ds_add_u32 v2, v4 offset:16
+// CHECK: ds_add_u32 v2, v4 offset:16 ; encoding: [0x10,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
+
+//===----------------------------------------------------------------------===//
+// Checks for 2 8-bit Offsets
+//===----------------------------------------------------------------------===//
+
+ds_write2_b32 v2, v4, v6 offset0:4
+// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 ; encoding: [0x04,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
+
+ds_write2_b32 v2, v4, v6 offset0:4 offset1:8
+// CHECK: ds_write2_b32 v2, v4, v6 offset0:4 offset1:8 ; encoding: [0x04,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
+
+ds_write2_b32 v2, v4, v6 offset1:8
+// CHECK: ds_write2_b32 v2, v4, v6 offset1:8 ; encoding: [0x00,0x08,0x38,0xd8,0x02,0x04,0x06,0x00]
+
+ds_read2_b32 v[8:9], v2 offset0:4
+// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 ; encoding: [0x04,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read2_b32 v[8:9], v2 offset0:4 offset1:8
+// CHECK: ds_read2_b32 v[8:9], v2 offset0:4 offset1:8 ; encoding: [0x04,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read2_b32 v[8:9], v2 offset1:8
+// CHECK: ds_read2_b32 v[8:9], v2 offset1:8 ; encoding: [0x00,0x08,0xdc,0xd8,0x02,0x00,0x00,0x08]
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+ds_add_u32 v2, v4
+// CHECK: ds_add_u32 v2, v4 ; encoding: [0x00,0x00,0x00,0xd8,0x02,0x04,0x00,0x00]
+
+ds_sub_u32 v2, v4
+// CHECK: ds_sub_u32 v2, v4 ; encoding: [0x00,0x00,0x04,0xd8,0x02,0x04,0x00,0x00]
+
+ds_rsub_u32 v2, v4
+// CHECK: ds_rsub_u32 v2, v4 ; encoding: [0x00,0x00,0x08,0xd8,0x02,0x04,0x00,0x00]
+
+ds_inc_u32 v2, v4
+// CHECK: ds_inc_u32 v2, v4 ; encoding: [0x00,0x00,0x0c,0xd8,0x02,0x04,0x00,0x00]
+
+ds_dec_u32 v2, v4
+// CHECK: ds_dec_u32 v2, v4 ; encoding: [0x00,0x00,0x10,0xd8,0x02,0x04,0x00,0x00]
+
+ds_min_i32 v2, v4
+// CHECK: ds_min_i32 v2, v4 ; encoding: [0x00,0x00,0x14,0xd8,0x02,0x04,0x00,0x00]
+
+ds_max_i32 v2, v4
+// CHECK: ds_max_i32 v2, v4 ; encoding: [0x00,0x00,0x18,0xd8,0x02,0x04,0x00,0x00]
+
+ds_min_u32 v2, v4
+// CHECK: ds_min_u32 v2, v4 ; encoding: [0x00,0x00,0x1c,0xd8,0x02,0x04,0x00,0x00]
+
+ds_max_u32 v2, v4
+// CHECK: ds_max_u32 v2, v4 ; encoding: [0x00,0x00,0x20,0xd8,0x02,0x04,0x00,0x00]
+
+ds_and_b32 v2, v4
+// CHECK: ds_and_b32 v2, v4 ; encoding: [0x00,0x00,0x24,0xd8,0x02,0x04,0x00,0x00]
+
+ds_or_b32 v2, v4
+// CHECK: ds_or_b32 v2, v4 ; encoding: [0x00,0x00,0x28,0xd8,0x02,0x04,0x00,0x00]
+
+ds_xor_b32 v2, v4
+// CHECK: ds_xor_b32 v2, v4 ; encoding: [0x00,0x00,0x2c,0xd8,0x02,0x04,0x00,0x00]
+
+ds_mskor_b32 v2, v4, v6
+// CHECK: ds_mskor_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x04,0x06,0x00]
+
+ds_write_b32 v2, v4
+// CHECK: ds_write_b32 v2, v4 ; encoding: [0x00,0x00,0x34,0xd8,0x02,0x04,0x00,0x00]
+
+ds_write2_b32 v2, v4, v6
+// CHECK: ds_write2_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x38,0xd8,0x02,0x04,0x06,0x00]
+
+ds_write2st64_b32 v2, v4, v6
+// CHECK: ds_write2st64_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x3c,0xd8,0x02,0x04,0x06,0x00]
+
+ds_cmpst_b32 v2, v4, v6
+// CHECK: ds_cmpst_b32 v2, v4, v6 ; encoding: [0x00,0x00,0x40,0xd8,0x02,0x04,0x06,0x00]
+
+ds_cmpst_f32 v2, v4, v6
+// CHECK: ds_cmpst_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x44,0xd8,0x02,0x04,0x06,0x00]
+
+ds_min_f32 v2, v4, v6
+// CHECK: ds_min_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x48,0xd8,0x02,0x04,0x06,0x00]
+
+ds_max_f32 v2, v4, v6
+// CHECK: ds_max_f32 v2, v4, v6 ; encoding: [0x00,0x00,0x4c,0xd8,0x02,0x04,0x06,0x00]
+
+ds_gws_init v2 gds
+// CHECK: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x66,0xd8,0x02,0x00,0x00,0x00]
+
+ds_gws_sema_v v2 gds
+// CHECK: ds_gws_sema_v v2 gds ; encoding: [0x00,0x00,0x6a,0xd8,0x02,0x00,0x00,0x00]
+
+ds_gws_sema_br v2 gds
+// CHECK: ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x6e,0xd8,0x02,0x00,0x00,0x00]
+
+ds_gws_sema_p v2 gds
+// CHECK: ds_gws_sema_p v2 gds ; encoding: [0x00,0x00,0x72,0xd8,0x02,0x00,0x00,0x00]
+
+ds_gws_barrier v2 gds
+// CHECK: ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x76,0xd8,0x02,0x00,0x00,0x00]
+
+ds_write_b8 v2, v4
+// CHECK: ds_write_b8 v2, v4 ; encoding: [0x00,0x00,0x78,0xd8,0x02,0x04,0x00,0x00]
+
+ds_write_b16 v2, v4
+// CHECK: ds_write_b16 v2, v4 ; encoding: [0x00,0x00,0x7c,0xd8,0x02,0x04,0x00,0x00]
+
+ds_add_rtn_u32 v8, v2, v4
+// CHECK: ds_add_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x80,0xd8,0x02,0x04,0x00,0x08]
+
+ds_sub_rtn_u32 v8, v2, v4
+// CHECK: ds_sub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x84,0xd8,0x02,0x04,0x00,0x08]
+
+ds_rsub_rtn_u32 v8, v2, v4
+// CHECK: ds_rsub_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x88,0xd8,0x02,0x04,0x00,0x08]
+
+ds_inc_rtn_u32 v8, v2, v4
+// CHECK: ds_inc_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x8c,0xd8,0x02,0x04,0x00,0x08]
+
+ds_dec_rtn_u32 v8, v2, v4
+// CHECK: ds_dec_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x90,0xd8,0x02,0x04,0x00,0x08]
+
+ds_min_rtn_i32 v8, v2, v4
+// CHECK: ds_min_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x94,0xd8,0x02,0x04,0x00,0x08]
+
+ds_max_rtn_i32 v8, v2, v4
+// CHECK: ds_max_rtn_i32 v8, v2, v4 ; encoding: [0x00,0x00,0x98,0xd8,0x02,0x04,0x00,0x08]
+
+ds_min_rtn_u32 v8, v2, v4
+// CHECK: ds_min_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0x9c,0xd8,0x02,0x04,0x00,0x08]
+
+ds_max_rtn_u32 v8, v2, v4
+// CHECK: ds_max_rtn_u32 v8, v2, v4 ; encoding: [0x00,0x00,0xa0,0xd8,0x02,0x04,0x00,0x08]
+
+ds_and_rtn_b32 v8, v2, v4
+// CHECK: ds_and_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa4,0xd8,0x02,0x04,0x00,0x08]
+
+ds_or_rtn_b32 v8, v2, v4
+// CHECK: ds_or_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xa8,0xd8,0x02,0x04,0x00,0x08]
+
+ds_xor_rtn_b32 v8, v2, v4
+// CHECK: ds_xor_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xac,0xd8,0x02,0x04,0x00,0x08]
+
+ds_mskor_rtn_b32 v8, v2, v4, v6
+// CHECK: ds_mskor_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xb0,0xd8,0x02,0x04,0x06,0x08]
+
+ds_wrxchg_rtn_b32 v8, v2, v4
+// CHECK: ds_wrxchg_rtn_b32 v8, v2, v4 ; encoding: [0x00,0x00,0xb4,0xd8,0x02,0x04,0x00,0x08]
+
+ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6
+// CHECK: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xb8,0xd8,0x02,0x04,0x06,0x08]
+
+ds_wrxchg2st64_rtn_b32 v[8:9] v2, v4, v6
+// CHECK: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xbc,0xd8,0x02,0x04,0x06,0x08]
+
+ds_cmpst_rtn_b32 v8, v2, v4, v6
+// CHECK: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc0,0xd8,0x02,0x04,0x06,0x08]
+
+ds_cmpst_rtn_f32 v8, v2, v4, v6
+// CHECK: ds_cmpst_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc4,0xd8,0x02,0x04,0x06,0x08]
+
+ds_min_rtn_f32 v8, v2, v4, v6
+// CHECK: ds_min_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc8,0xd8,0x02,0x04,0x06,0x08]
+
+ds_max_rtn_f32 v8, v2, v4, v6
+// CHECK: ds_max_rtn_f32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xcc,0xd8,0x02,0x04,0x06,0x08]
+
+ds_swizzle_b32 v8, v2
+// CHECK: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read_b32 v8, v2
+// CHECK: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0xd8,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read2_b32 v[8:9], v2
+// CHECK: ds_read2_b32 v[8:9], v2 ; encoding: [0x00,0x00,0xdc,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read2st64_b32 v[8:9], v2
+// CHECK: ds_read2st64_b32 v[8:9], v2 ; encoding: [0x00,0x00,0xe0,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read_i8 v8, v2
+// CHECK: ds_read_i8 v8, v2 ; encoding: [0x00,0x00,0xe4,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read_u8 v8, v2
+// CHECK: ds_read_u8 v8, v2 ; encoding: [0x00,0x00,0xe8,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read_i16 v8, v2
+// CHECK: ds_read_i16 v8, v2 ; encoding: [0x00,0x00,0xec,0xd8,0x02,0x00,0x00,0x08]
+
+ds_read_u16 v8, v2
+// CHECK: ds_read_u16 v8, v2 ; encoding: [0x00,0x00,0xf0,0xd8,0x02,0x00,0x00,0x08]
+
+ds_consume v8
+// CHECK: ds_consume v8 ; encoding: [0x00,0x00,0xf4,0xd8,0x00,0x00,0x00,0x08]
+
+ds_append v8
+// CHECK: ds_append v8 ; encoding: [0x00,0x00,0xf8,0xd8,0x00,0x00,0x00,0x08]
+
+ds_ordered_count v8, v2 gds
+// CHECK: ds_ordered_count v8, v2 gds ; encoding: [0x00,0x00,0xfe,0xd8,0x02,0x00,0x00,0x08]
+
+ds_add_u64 v2, v[4:5]
+// CHECK: ds_add_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x00,0xd9,0x02,0x04,0x00,0x00]
+
+ds_sub_u64 v2, v[4:5]
+// CHECK: ds_sub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x04,0xd9,0x02,0x04,0x00,0x00]
+
+ds_rsub_u64 v2, v[4:5]
+// CHECK: ds_rsub_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x08,0xd9,0x02,0x04,0x00,0x00]
+
+ds_inc_u64 v2, v[4:5]
+// CHECK: ds_inc_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x0c,0xd9,0x02,0x04,0x00,0x00]
+
+ds_dec_u64 v2, v[4:5]
+// CHECK: ds_dec_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x10,0xd9,0x02,0x04,0x00,0x00]
+
+ds_min_i64 v2, v[4:5]
+// CHECK: ds_min_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x14,0xd9,0x02,0x04,0x00,0x00]
+
+ds_max_i64 v2, v[4:5]
+// CHECK: ds_max_i64 v2, v[4:5] ; encoding: [0x00,0x00,0x18,0xd9,0x02,0x04,0x00,0x00]
+
+ds_min_u64 v2, v[4:5]
+// CHECK: ds_min_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x1c,0xd9,0x02,0x04,0x00,0x00]
+
+ds_max_u64 v2, v[4:5]
+// CHECK: ds_max_u64 v2, v[4:5] ; encoding: [0x00,0x00,0x20,0xd9,0x02,0x04,0x00,0x00]
+
+ds_and_b64 v2, v[4:5]
+// CHECK: ds_and_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x24,0xd9,0x02,0x04,0x00,0x00]
+
+ds_or_b64 v2, v[4:5]
+// CHECK: ds_or_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x28,0xd9,0x02,0x04,0x00,0x00]
+
+ds_xor_b64 v2, v[4:5]
+// CHECK: ds_xor_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x2c,0xd9,0x02,0x04,0x00,0x00]
+
+ds_mskor_b64 v2, v[4:5], v[6:7]
+// CHECK: ds_mskor_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x30,0xd9,0x02,0x04,0x06,0x00]
+
+ds_write_b64 v2, v[4:5]
+// CHECK: ds_write_b64 v2, v[4:5] ; encoding: [0x00,0x00,0x34,0xd9,0x02,0x04,0x00,0x00]
+
+ds_write2_b64 v2, v[4:5], v[6:7]
+// CHECK: ds_write2_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x38,0xd9,0x02,0x04,0x06,0x00]
+
+ds_write2st64_b64 v2, v[4:5], v[6:7]
+// CHECK: ds_write2st64_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x3c,0xd9,0x02,0x04,0x06,0x00]
+
+ds_cmpst_b64 v2, v[4:5], v[6:7]
+// CHECK: ds_cmpst_b64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x40,0xd9,0x02,0x04,0x06,0x00]
+
+ds_cmpst_f64 v2, v[4:5], v[6:7]
+// CHECK: ds_cmpst_f64 v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0x44,0xd9,0x02,0x04,0x06,0x00]
+
+ds_min_f64 v2, v[4:5]
+// CHECK: ds_min_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x48,0xd9,0x02,0x04,0x00,0x00]
+
+ds_max_f64 v2, v[4:5]
+// CHECK: ds_max_f64 v2, v[4:5] ; encoding: [0x00,0x00,0x4c,0xd9,0x02,0x04,0x00,0x00]
+
+ds_add_rtn_u64 v[8:9], v2, v[4:5]
+// CHECK: ds_add_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x80,0xd9,0x02,0x04,0x00,0x08]
+
+ds_sub_rtn_u64 v[8:9], v2, v[4:5]
+// CHECK: ds_sub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x84,0xd9,0x02,0x04,0x00,0x08]
+
+ds_rsub_rtn_u64 v[8:9], v2, v[4:5]
+// CHECK: ds_rsub_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x88,0xd9,0x02,0x04,0x00,0x08]
+
+ds_inc_rtn_u64 v[8:9], v2, v[4:5]
+// CHECK: ds_inc_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x8c,0xd9,0x02,0x04,0x00,0x08]
+
+ds_dec_rtn_u64 v[8:9] v2, v[4:5]
+// CHECK: ds_dec_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x90,0xd9,0x02,0x04,0x00,0x08]
+
+ds_min_rtn_i64 v[8:9], v2, v[4:5]
+// CHECK: ds_min_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x94,0xd9,0x02,0x04,0x00,0x08]
+
+ds_max_rtn_i64 v[8:9], v2, v[4:5]
+// CHECK: ds_max_rtn_i64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x98,0xd9,0x02,0x04,0x00,0x08]
+
+ds_min_rtn_u64 v[8:9], v2, v[4:5]
+// CHECK: ds_min_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0x9c,0xd9,0x02,0x04,0x00,0x08]
+
+ds_max_rtn_u64 v[8:9], v2, v[4:5]
+// CHECK: ds_max_rtn_u64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa0,0xd9,0x02,0x04,0x00,0x08]
+
+ds_and_rtn_b64 v[8:9], v2, v[4:5]
+// CHECK: ds_and_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa4,0xd9,0x02,0x04,0x00,0x08]
+
+ds_or_rtn_b64 v[8:9], v2, v[4:5]
+// CHECK: ds_or_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xa8,0xd9,0x02,0x04,0x00,0x08]
+
+ds_xor_rtn_b64 v[8:9], v2, v[4:5]
+// CHECK: ds_xor_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xac,0xd9,0x02,0x04,0x00,0x08]
+
+ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
+// CHECK: ds_mskor_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb0,0xd9,0x02,0x04,0x06,0x08]
+
+ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5]
+// CHECK: ds_wrxchg_rtn_b64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xb4,0xd9,0x02,0x04,0x00,0x08]
+
+ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
+// CHECK: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb8,0xd9,0x02,0x04,0x06,0x08]
+
+ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
+// CHECK: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xbc,0xd9,0x02,0x04,0x06,0x08]
+
+ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
+// CHECK: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x04,0x06,0x08]
+
+ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7]
+// CHECK: ds_cmpst_rtn_f64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc4,0xd9,0x02,0x04,0x06,0x08]
+
+ds_min_rtn_f64 v[8:9], v2, v[4:5]
+// CHECK: ds_min_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xc8,0xd9,0x02,0x04,0x00,0x08]
+
+ds_max_rtn_f64 v[8:9], v2, v[4:5]
+// CHECK: ds_max_rtn_f64 v[8:9], v2, v[4:5] ; encoding: [0x00,0x00,0xcc,0xd9,0x02,0x04,0x00,0x08]
+
+ds_read_b64 v[8:9], v2
+// CHECK: ds_read_b64 v[8:9], v2 ; encoding: [0x00,0x00,0xd8,0xd9,0x02,0x00,0x00,0x08]
+
+ds_read2_b64 v[8:11], v2
+// CHECK: ds_read2_b64 v[8:11], v2 ; encoding: [0x00,0x00,0xdc,0xd9,0x02,0x00,0x00,0x08]
+
+ds_read2st64_b64 v[8:11], v2
+// CHECK: ds_read2st64_b64 v[8:11], v2 ; encoding: [0x00,0x00,0xe0,0xd9,0x02,0x00,0x00,0x08]
diff --git a/test/MC/R600/mubuf.s b/test/MC/R600/mubuf.s
new file mode 100644
index 0000000..78d365a
--- /dev/null
+++ b/test/MC/R600/mubuf.s
@@ -0,0 +1,352 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Test for different operand combinations
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// load - immediate offset only
+//===----------------------------------------------------------------------===//
+
+buffer_load_dword v1, s[4:7], s1
+// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dword v1, s[4:7], s1 offset:4
+// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dword v1, s[4:7], s1 offset:4 glc
+// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dword v1, s[4:7], s1 offset:4 slc
+// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
+
+buffer_load_dword v1, s[4:7], s1 offset:4 tfe
+// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
+
+buffer_load_dword v1, s[4:7], s1 tfe glc
+// CHECK: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
+
+buffer_load_dword v1, s[4:7], s1 offset:4 glc tfe slc
+// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
+
+buffer_load_dword v1, s[4:7], s1 glc tfe slc offset:4
+// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// load - vgpr offset
+//===----------------------------------------------------------------------===//
+
+buffer_load_dword v1, v2, s[4:7], s1 offen
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 offen offset:4
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 offen tfe glc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// load - vgpr index
+//===----------------------------------------------------------------------===//
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen tfe glc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
+// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// load - vgpr index and offset
+//===----------------------------------------------------------------------===//
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// load - addr64
+//===----------------------------------------------------------------------===//
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
+// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// store - immediate offset only
+//===----------------------------------------------------------------------===//
+
+buffer_store_dword v1, s[4:7], s1
+// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_dword v1, s[4:7], s1 offset:4
+// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_dword v1, s[4:7], s1 offset:4 glc
+// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_dword v1, s[4:7], s1 offset:4 slc
+// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
+
+buffer_store_dword v1, s[4:7], s1 offset:4 tfe
+// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
+
+buffer_store_dword v1, s[4:7], s1 tfe glc
+// CHECK: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
+
+buffer_store_dword v1, s[4:7], s1 offset:4 glc tfe slc
+// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
+
+buffer_store_dword v1, s[4:7], s1 glc tfe slc offset:4
+// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// store - vgpr offset
+//===----------------------------------------------------------------------===//
+
+buffer_store_dword v1, v2, s[4:7], s1 offen
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 offen offset:4
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 offen tfe glc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// store - vgpr index
+//===----------------------------------------------------------------------===//
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen tfe glc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
+// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// store - vgpr index and offset
+//===----------------------------------------------------------------------===//
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// store - addr64
+//===----------------------------------------------------------------------===//
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
+// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+buffer_load_format_x v1, s[4:7], s1
+// CHECK: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_format_xy v[1:2], s[4:7], s1
+// CHECK: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_format_xyz v[1:3], s[4:7], s1
+// CHECK: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_format_xyzw v[1:4], s[4:7], s1
+// CHECK: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_format_x v1, s[4:7], s1
+// CHECK: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_format_xy v[1:2], s[4:7], s1
+// CHECK: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_format_xyz v[1:3], s[4:7], s1
+// CHECK: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_format_xyzw v[1:4], s[4:7], s1
+// CHECK: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_ubyte v1, s[4:7], s1
+// CHECK: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_sbyte v1, s[4:7], s1
+// CHECK: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_ushort v1, s[4:7], s1
+// CHECK: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_sshort v1, s[4:7], s1
+// CHECK: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dword v1, s[4:7], s1
+// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dwordx2 v[1:2], s[4:7], s1
+// CHECK: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_dwordx4 v[1:4], s[4:7], s1
+// CHECK: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_byte v1, s[4:7], s1
+// CHECK: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_short v1, s[4:7], s1
+// CHECK: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_dword v1 s[4:7], s1
+// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_dwordx2 v[1:2], s[4:7], s1
+// CHECK: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_dwordx4 v[1:4], s[4:7], s1
+// CHECK: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
+
+// TODO: Atomics
diff --git a/test/MC/R600/smrd.s b/test/MC/R600/smrd.s
new file mode 100644
index 0000000..b67abf7
--- /dev/null
+++ b/test/MC/R600/smrd.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+s_load_dword s1, s[2:3], 1
+// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
+
+s_load_dword s1, s[2:3], s4
+// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
+
+s_load_dwordx2 s[2:3], s[2:3], 1
+// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
+
+s_load_dwordx2 s[2:3], s[2:3], s4
+// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
+
+s_load_dwordx4 s[4:7], s[2:3], 1
+// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
+
+s_load_dwordx4 s[4:7], s[2:3], s4
+// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
+
+s_load_dwordx8 s[8:15], s[2:3], 1
+// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
+
+s_load_dwordx8 s[8:15], s[2:3], s4
+// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
+
+s_load_dwordx16 s[16:31], s[2:3], 1
+// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
+
+s_load_dwordx16 s[16:31], s[2:3], s4
+// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
diff --git a/test/MC/R600/sop1-err.s b/test/MC/R600/sop1-err.s
new file mode 100644
index 0000000..f892356
--- /dev/null
+++ b/test/MC/R600/sop1-err.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
+
+s_mov_b32 v1, s2
+// CHECK: error: invalid operand for instruction
+
+s_mov_b32 s1, v0
+// CHECK: error: invalid operand for instruction
+
+s_mov_b32 s[1:2], s0
+// CHECK: error: invalid operand for instruction
+
+s_mov_b32 s0, s[1:2]
+// CHECK: error: invalid operand for instruction
+
+s_mov_b32 s220, s0
+// CHECK: error: invalid operand for instruction
+
+s_mov_b32 s0, s220
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s1, s[0:1]
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s[0:1], s1
+// CHECK: error: invalid operand for instruction
+
+// Immediate greater than 32-bits
+s_mov_b32 s1, 0xfffffffff
+// CHECK: error: invalid immediate: only 32-bit values are legal
+
+// Immediate greater than 32-bits
+s_mov_b64 s[0:1], 0xfffffffff
+// CHECK: error: invalid immediate: only 32-bit values are legal
+
+// Out of range register
+s_mov_b32 s
diff --git a/test/MC/R600/sop1.s b/test/MC/R600/sop1.s
new file mode 100644
index 0000000..92ca73f
--- /dev/null
+++ b/test/MC/R600/sop1.s
@@ -0,0 +1,177 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+s_mov_b32 s1, s2
+// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
+
+s_mov_b32 s1, 1
+// CHECK: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe]
+
+s_mov_b32 s1, 100
+// CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
+
+s_mov_b64 s[2:3], s[4:5]
+// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
+
+s_mov_b64 s[2:3], 0xffffffffffffffff
+// CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
+
+s_cmov_b32 s1, 200
+// CHECK: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00]
+
+s_cmov_b32 s1, 1.0
+// CHECK: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x05,0x81,0xbe]
+
+//s_cmov_b64 s[2:3], 1.0
+//CHECK-FIXME: s_cmov_b64 s[2:3], 1.0 ; encoding: [0xf2,0x05,0x82,0xb3]
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+s_mov_b32 s1, s2
+// CHECK: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe]
+
+s_mov_b64 s[2:3], s[4:5]
+// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
+
+s_cmov_b32 s1, s2
+// CHECK: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe]
+
+s_cmov_b64 s[2:3], s[4:5]
+// CHECK: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe]
+
+s_not_b32 s1, s2
+// CHECK: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe]
+
+s_not_b64 s[2:3], s[4:5]
+// CHECK: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe]
+
+s_wqm_b32 s1, s2
+// CHECK: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe]
+
+s_wqm_b64 s[2:3], s[4:5]
+// CHECK: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe]
+
+s_brev_b32 s1, s2
+// CHECK: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe]
+
+s_brev_b64 s[2:3], s[4:5]
+// CHECK: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe]
+
+s_bcnt0_i32_b32 s1, s2
+// CHECK: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe]
+
+s_bcnt0_i32_b64 s1, s[2:3]
+// CHECK: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe]
+
+s_bcnt1_i32_b32 s1, s2
+// CHECK: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe]
+
+s_bcnt1_i32_b64 s1, s[2:3]
+// CHECK: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe]
+
+s_ff0_i32_b32 s1, s2
+// CHECK: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe]
+
+s_ff0_i32_b64 s1, s[2:3]
+// CHECK: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe]
+
+s_ff1_i32_b32 s1, s2
+// CHECK: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe]
+
+s_ff1_i32_b64 s1, s[2:3]
+// CHECK: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe]
+
+s_flbit_i32_b32 s1, s2
+// CHECK: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe]
+
+s_flbit_i32_b64 s1, s[2:3]
+// CHECK: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe]
+
+s_flbit_i32 s1, s2
+// CHECK: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe]
+
+s_flbit_i32_i64 s1, s[2:3]
+// CHECK: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe]
+
+s_sext_i32_i8 s1, s2
+// CHECK: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe]
+
+s_sext_i32_i16 s1, s2
+// CHECK: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe]
+
+s_bitset0_b32 s1, s2
+// CHECK: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe]
+
+s_bitset0_b64 s[2:3], s[4:5]
+// CHECK: s_bitset0_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1c,0x82,0xbe]
+
+s_bitset1_b32 s1, s2
+// CHECK: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe]
+
+s_bitset1_b64 s[2:3], s[4:5]
+// CHECK: s_bitset1_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe]
+
+s_getpc_b64 s[2:3]
+// CHECK: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe]
+
+s_setpc_b64 s[2:3], s[4:5]
+// CHECK: s_setpc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe]
+
+s_swappc_b64 s[2:3], s[4:5]
+// CHECK: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe]
+
+s_rfe_b64 s[2:3], s[4:5]
+// CHECK: s_rfe_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe]
+
+s_and_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe]
+
+s_or_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe]
+
+s_xor_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe]
+
+s_andn2_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe]
+
+s_orn2_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe]
+
+s_nand_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe]
+
+s_nor_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe]
+
+s_xnor_saveexec_b64 s[2:3], s[4:5]
+// CHECK: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe]
+
+s_quadmask_b32 s1, s2
+// CHECK: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe]
+
+s_quadmask_b64 s[2:3], s[4:5]
+// CHECK: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe]
+
+s_movrels_b32 s1, s2
+// CHECK: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe]
+
+s_movrels_b64 s[2:3], s[4:5]
+// CHECK: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe]
+
+s_movreld_b32 s1, s2
+// CHECK: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe]
+
+s_movreld_b64 s[2:3], s[4:5]
+// CHECK: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe]
+
+s_cbranch_join s[4:5]
+// CHECK: s_cbranch_join s[4:5] ; encoding: [0x04,0x32,0x80,0xbe]
+
+s_abs_i32 s1, s2
+// CHECK: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe]
+
+s_mov_fed_b32 s1, s2
+// CHECK: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe]
diff --git a/test/MC/R600/sop2.s b/test/MC/R600/sop2.s
new file mode 100644
index 0000000..9a7a1c0
--- /dev/null
+++ b/test/MC/R600/sop2.s
@@ -0,0 +1,131 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+// CHECK: s_add_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x80]
+s_add_u32 s1, s2, s3
+
+// CHECK: s_sub_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x80]
+s_sub_u32 s1, s2, s3
+
+// CHECK: s_add_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x81]
+s_add_i32 s1, s2, s3
+
+// CHECK: s_sub_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x81]
+s_sub_i32 s1, s2, s3
+
+// CHECK: s_addc_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x82]
+s_addc_u32 s1, s2, s3
+
+// CHECK: s_subb_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x82]
+s_subb_u32 s1, s2, s3
+
+// CHECK: s_min_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x83]
+s_min_i32 s1, s2, s3
+
+// CHECK: s_min_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x83]
+s_min_u32 s1, s2, s3
+
+// CHECK: s_max_i32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x84]
+s_max_i32 s1, s2, s3
+
+// CHECK: s_max_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x81,0x84]
+s_max_u32 s1, s2, s3
+
+// CHECK: s_cselect_b32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x85]
+s_cselect_b32 s1, s2, s3
+
+// CHECK: s_cselect_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x85]
+s_cselect_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87]
+s_and_b32 s2, s4, s6
+
+// CHECK: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87]
+s_and_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88]
+s_or_b32 s2, s4, s6
+
+// CHECK: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88]
+s_or_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89]
+s_xor_b32 s2, s4, s6
+
+// CHECK: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89]
+s_xor_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a]
+s_andn2_b32 s2, s4, s6
+
+// CHECK: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a]
+s_andn2_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b]
+s_orn2_b32 s2, s4, s6
+
+// CHECK: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b]
+s_orn2_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c]
+s_nand_b32 s2, s4, s6
+
+// CHECK: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c]
+s_nand_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d]
+s_nor_b32 s2, s4, s6
+
+// CHECK: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d]
+s_nor_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e]
+s_xnor_b32 s2, s4, s6
+
+// CHECK: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e]
+s_xnor_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f]
+s_lshl_b32 s2, s4, s6
+
+// CHECK: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f]
+s_lshl_b64 s[2:3], s[4:5], s6
+
+// CHECK: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90]
+s_lshr_b32 s2, s4, s6
+
+// CHECK: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90]
+s_lshr_b64 s[2:3], s[4:5], s6
+
+// CHECK: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91]
+s_ashr_i32 s2, s4, s6
+
+// CHECK: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91]
+s_ashr_i64 s[2:3], s[4:5], s6
+
+// CHECK: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92]
+s_bfm_b32 s2, s4, s6
+
+// CHECK: s_bfm_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x92]
+s_bfm_b64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93]
+s_mul_i32 s2, s4, s6
+
+// CHECK: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93]
+s_bfe_u32 s2, s4, s6
+
+// CHECK: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94]
+s_bfe_i32 s2, s4, s6
+
+// CHECK: s_bfe_u64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x94]
+s_bfe_u64 s[2:3], s[4:5], s[6:7]
+
+// CHECK: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95]
+s_bfe_i64 s[2:3], s[4:5], s6
+
+// CHECK: s_cbranch_g_fork s[4:5], s[6:7] ; encoding: [0x04,0x06,0x80,0x95]
+s_cbranch_g_fork s[4:5], s[6:7]
+
+// CHECK: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96]
+s_absdiff_i32 s2, s4, s6
diff --git a/test/MC/R600/sopc.s b/test/MC/R600/sopc.s
new file mode 100644
index 0000000..0899c1a
--- /dev/null
+++ b/test/MC/R600/sopc.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+s_cmp_eq_i32 s1, s2
+// CHECK: s_cmp_eq_i32 s1, s2 ; encoding: [0x01,0x02,0x00,0xbf]
diff --git a/test/MC/R600/sopk.s b/test/MC/R600/sopk.s
new file mode 100644
index 0000000..6c27aac
--- /dev/null
+++ b/test/MC/R600/sopk.s
@@ -0,0 +1,66 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+s_movk_i32 s2, 0x6
+// CHECK: s_movk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb0]
+
+s_cmovk_i32 s2, 0x6
+// CHECK: s_cmovk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb1]
+
+s_cmpk_eq_i32 s2, 0x6
+// CHECK: s_cmpk_eq_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb1]
+
+s_cmpk_lg_i32 s2, 0x6
+// CHECK: s_cmpk_lg_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb2]
+
+s_cmpk_gt_i32 s2, 0x6
+// CHECK: s_cmpk_gt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb2]
+
+s_cmpk_ge_i32 s2, 0x6
+// CHECK: s_cmpk_ge_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb3]
+
+s_cmpk_lt_i32 s2, 0x6
+// CHECK: s_cmpk_lt_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb3]
+
+s_cmpk_le_i32 s2, 0x6
+// CHECK: s_cmpk_le_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb4]
+
+s_cmpk_eq_u32 s2, 0x6
+// CHECK: s_cmpk_eq_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb4]
+
+s_cmpk_lg_u32 s2, 0x6
+// CHECK: s_cmpk_lg_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb5]
+
+s_cmpk_gt_u32 s2, 0x6
+// CHECK: s_cmpk_gt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb5]
+
+s_cmpk_ge_u32 s2, 0x6
+// CHECK: s_cmpk_ge_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb6]
+
+s_cmpk_lt_u32 s2, 0x6
+// CHECK: s_cmpk_lt_u32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb6]
+
+s_cmpk_le_u32 s2, 0x6
+// CHECK: s_cmpk_le_u32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb7]
+
+s_addk_i32 s2, 0x6
+// CHECK: s_addk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb7]
+
+s_mulk_i32 s2, 0x6
+// CHECK: s_mulk_i32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb8]
+
+s_cbranch_i_fork s[2:3], 0x6
+// CHECK: s_cbranch_i_fork s[2:3], 0x6 ; encoding: [0x06,0x00,0x82,0xb8]
+
+s_getreg_b32 s2, 0x6
+// CHECK: s_getreg_b32 s2, 0x6 ; encoding: [0x06,0x00,0x02,0xb9]
+
+s_setreg_b32 s2, 0x6
+// CHECK: s_setreg_b32 s2, 0x6 ; encoding: [0x06,0x00,0x82,0xb9]
+
+s_setreg_imm32_b32 0xff, 0x6
+// CHECK: s_setreg_imm32_b32 0xff, 0x6 ; encoding: [0x06,0x00,0x80,0xba,0xff,0x00,0x00,0x00]
diff --git a/test/MC/R600/sopp.s b/test/MC/R600/sopp.s
index 0f186b1..b072c16 100644
--- a/test/MC/R600/sopp.s
+++ b/test/MC/R600/sopp.s
@@ -1,4 +1,16 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI  -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Edge Cases
+//===----------------------------------------------------------------------===//
+
+s_nop 0       // CHECK: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf] 
+s_nop 0xffff  // CHECK: s_nop 0xffff ; encoding: [0xff,0xff,0x80,0xbf]
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
 
   s_nop 1            // CHECK: s_nop 1 ; encoding: [0x01,0x00,0x80,0xbf]
   s_endpgm           // CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
diff --git a/test/MC/R600/vop1.s b/test/MC/R600/vop1.s
new file mode 100644
index 0000000..9c9a6b2
--- /dev/null
+++ b/test/MC/R600/vop1.s
@@ -0,0 +1,182 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+// CHECK: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
+v_nop
+
+// CHECK: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
+v_mov_b32 v1, v2
+
+// CHECK: v_readfirstlane_b32 s1, v2 ; encoding: [0x02,0x05,0x02,0x7e]
+v_readfirstlane_b32 s1, v2
+
+// CHECK: v_cvt_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x07,0x02,0x7e]
+v_cvt_i32_f64 v1, v[2:3]
+
+// CHECK: v_cvt_f64_i32_e32 v[1:2], v2 ; encoding: [0x02,0x09,0x02,0x7e]
+v_cvt_f64_i32 v[1:2], v2
+
+// CHECK: v_cvt_f32_i32_e32 v1, v2 ; encoding: [0x02,0x0b,0x02,0x7e]
+v_cvt_f32_i32 v1, v2
+
+// CHECK: v_cvt_f32_u32_e32 v1, v2 ; encoding: [0x02,0x0d,0x02,0x7e]
+v_cvt_f32_u32 v1, v2
+
+// CHECK: v_cvt_u32_f32_e32 v1, v2 ; encoding: [0x02,0x0f,0x02,0x7e
+v_cvt_u32_f32 v1, v2
+
+// CHECK: v_cvt_i32_f32_e32 v1, v2 ; encoding: [0x02,0x11,0x02,0x7e]
+v_cvt_i32_f32 v1, v2
+
+// CHECK: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
+v_mov_fed_b32 v1, v2
+
+// CHECK: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e]
+v_cvt_f16_f32 v1, v2
+
+// CHECK: v_cvt_f32_f16_e32 v1, v2 ; encoding: [0x02,0x17,0x02,0x7e]
+v_cvt_f32_f16 v1, v2
+
+// CHECK: v_cvt_rpi_i32_f32_e32 v1, v2 ; encoding: [0x02,0x19,0x02,0x7e]
+v_cvt_rpi_i32_f32 v1, v2
+
+// CHECK: v_cvt_flr_i32_f32_e32 v1, v2 ; encoding: [0x02,0x1b,0x02,0x7e]
+v_cvt_flr_i32_f32 v1, v2
+
+// CHECK: v_cvt_off_f32_i4_e32 v1, v2 ; encoding: [0x02,0x1d,0x02,0x7e]
+v_cvt_off_f32_i4_e32 v1, v2
+
+// CHECK: v_cvt_f32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x1f,0x02,0x7e]
+v_cvt_f32_f64 v1, v[2:3]
+
+// CHECK: v_cvt_f64_f32_e32 v[1:2], v2 ; encoding: [0x02,0x21,0x02,0x7e]
+v_cvt_f64_f32 v[1:2], v2
+
+// CHECK: v_cvt_f32_ubyte0_e32 v1, v2 ; encoding: [0x02,0x23,0x02,0x7e]
+v_cvt_f32_ubyte0 v1, v2
+
+// CHECK: v_cvt_f32_ubyte1_e32 v1, v2 ; encoding: [0x02,0x25,0x02,0x7e]
+v_cvt_f32_ubyte1_e32 v1, v2
+
+// CHECK: v_cvt_f32_ubyte2_e32 v1, v2 ; encoding: [0x02,0x27,0x02,0x7e]
+v_cvt_f32_ubyte2 v1, v2
+
+// CHECK: v_cvt_f32_ubyte3_e32 v1, v2 ; encoding: [0x02,0x29,0x02,0x7e]
+v_cvt_f32_ubyte3 v1, v2
+
+// CHECK: v_cvt_u32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x2b,0x02,0x7e]
+v_cvt_u32_f64 v1, v[2:3]
+
+// CHECK: v_cvt_f64_u32_e32 v[1:2], v2 ; encoding: [0x02,0x2d,0x02,0x7e]
+v_cvt_f64_u32 v[1:2], v2
+
+// CHECK: v_fract_f32_e32 v1, v2 ; encoding: [0x02,0x41,0x02,0x7e]
+v_fract_f32 v1, v2
+
+// CHECK: v_trunc_f32_e32 v1, v2 ; encoding: [0x02,0x43,0x02,0x7e]
+v_trunc_f32 v1, v2
+
+// CHECK: v_ceil_f32_e32 v1, v2 ; encoding: [0x02,0x45,0x02,0x7e]
+v_ceil_f32 v1, v2
+
+// CHECK: v_rndne_f32_e32 v1, v2 ; encoding: [0x02,0x47,0x02,0x7e]
+v_rndne_f32 v1, v2
+
+// CHECK: v_floor_f32_e32 v1, v2 ; encoding: [0x02,0x49,0x02,0x7e]
+v_floor_f32_e32 v1, v2
+
+// CHECK: v_exp_f32_e32 v1, v2 ; encoding: [0x02,0x4b,0x02,0x7e]
+v_exp_f32 v1, v2
+
+// CHECK: v_log_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x4d,0x02,0x7e]
+v_log_clamp_f32 v1, v2
+
+// CHECK: v_log_f32_e32 v1, v2 ; encoding: [0x02,0x4f,0x02,0x7e]
+v_log_f32 v1, v2
+
+// CHECK: v_rcp_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x51,0x02,0x7e]
+v_rcp_clamp_f32 v1, v2
+
+// CHECK: v_rcp_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x53,0x02,0x7e]
+v_rcp_legacy_f32 v1, v2
+
+// CHECK: v_rcp_f32_e32 v1, v2 ; encoding: [0x02,0x55,0x02,0x7e]
+v_rcp_f32 v1, v2
+
+// CHECK: v_rcp_iflag_f32_e32 v1, v2 ; encoding: [0x02,0x57,0x02,0x7e]
+v_rcp_iflag_f32 v1, v2
+
+// CHECK: v_rsq_clamp_f32_e32 v1, v2 ; encoding: [0x02,0x59,0x02,0x7e]
+v_rsq_clamp_f32 v1, v2
+
+// CHECK: v_rsq_legacy_f32_e32 v1, v2 ; encoding: [0x02,0x5b,0x02,0x7e]
+v_rsq_legacy_f32 v1, v2
+
+// CHECK: v_rsq_f32_e32 v1, v2 ; encoding: [0x02,0x5d,0x02,0x7e]
+v_rsq_f32_e32 v1, v2
+
+// CHECK: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e]
+v_rcp_f64 v[1:2], v[2:3]
+
+// CHECK: v_rcp_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x61,0x02,0x7e]
+v_rcp_clamp_f64 v[1:2], v[2:3]
+
+// CHECK: v_rsq_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x63,0x02,0x7e]
+v_rsq_f64 v[1:2], v[2:3]
+
+// CHECK: v_rsq_clamp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x65,0x02,0x7e]
+v_rsq_clamp_f64 v[1:2], v[2:3]
+
+// CHECK: v_sqrt_f32_e32 v1, v2 ; encoding: [0x02,0x67,0x02,0x7e]
+v_sqrt_f32 v1, v2
+
+// CHECK: v_sqrt_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x69,0x02,0x7e]
+v_sqrt_f64 v[1:2], v[2:3]
+
+// CHECK: v_sin_f32_e32 v1, v2 ; encoding: [0x02,0x6b,0x02,0x7e]
+v_sin_f32 v1, v2
+
+// CHECK: v_cos_f32_e32 v1, v2 ; encoding: [0x02,0x6d,0x02,0x7e]
+v_cos_f32 v1, v2
+
+// CHECK: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e]
+v_not_b32 v1, v2
+
+// CHECK: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e]
+v_bfrev_b32 v1, v2
+
+// CHECK: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e]
+v_ffbh_u32 v1, v2
+
+// CHECK: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e]
+v_ffbl_b32 v1, v2
+
+// CHECK: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e]
+v_ffbh_i32_e32 v1, v2
+
+// CHECK: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x79,0x02,0x7e]
+v_frexp_exp_i32_f64 v1, v[2:3]
+
+// CHECK: v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7b,0x02,0x7e]
+v_frexp_mant_f64 v[1:2], v[2:3]
+
+// CHECK: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7d,0x02,0x7e]
+v_fract_f64 v[1:2], v[2:3]
+
+// CHECK: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e]
+v_frexp_exp_i32_f32 v1, v2
+
+// CHECK: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e]
+v_frexp_mant_f32 v1, v2
+
+// CHECK: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
+v_clrexcp
+
+// CHECK: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e]
+v_movreld_b32 v1, v2
+
+// CHECK: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e]
+v_movrels_b32 v1, v2
+
+// CHECK: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e]
+v_movrelsd_b32 v1, v2
diff --git a/test/MC/R600/vop2-err.s b/test/MC/R600/vop2-err.s
new file mode 100644
index 0000000..a113100
--- /dev/null
+++ b/test/MC/R600/vop2-err.s
@@ -0,0 +1,35 @@
+// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Generic checks
+//===----------------------------------------------------------------------===//
+
+v_mul_i32_i24 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+
+//===----------------------------------------------------------------------===//
+// _e32 checks
+//===----------------------------------------------------------------------===//
+
+// Immediate src1
+v_mul_i32_i24_e32 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+
+// sgpr src1
+v_mul_i32_i24_e32 v1, v2, s3
+// CHECK: error: invalid operand for instruction
+
+//===----------------------------------------------------------------------===//
+// _e64 checks
+//===----------------------------------------------------------------------===//
+
+// Immediate src0
+v_mul_i32_i24_e64 v1, 100, v3
+// CHECK: error: invalid operand for instruction
+
+// Immediate src1
+v_mul_i32_i24_e64 v1, v2, 100
+// CHECK: error: invalid operand for instruction
+
+// TODO: Constant bus restrictions
diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s
new file mode 100644
index 0000000..6780088
--- /dev/null
+++ b/test/MC/R600/vop2.s
@@ -0,0 +1,242 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Generic Checks for floating-point instructions (These have modifiers).
+//===----------------------------------------------------------------------===//
+
+// TODO: 64-bit encoding of instructions with modifiers
+
+// _e32 suffix
+// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
+v_add_f32_e32 v1, v2, v3
+
+// src0 inline immediate
+// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06]
+v_add_f32 v1, 1.0, v3
+
+// src0 negative inline immediate
+// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06]
+v_add_f32 v1, -1.0, v3
+
+// src0 literal
+// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42]
+v_add_f32 v1, 100.0, v3
+
+// src0 negative literal
+// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2]
+v_add_f32 v1, -100.0, v3
+
+//===----------------------------------------------------------------------===//
+// Generic Checks for integer instructions (These don't have modifiers).
+//===----------------------------------------------------------------------===//
+
+// _e32 suffix
+// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] 
+v_mul_i32_i24_e32 v1, v2, v3
+
+// _e64 suffix
+// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00]
+v_mul_i32_i24_e64 v1, v2, v3
+
+// src0 inline
+// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12]
+v_mul_i32_i24 v1, 3, v3
+
+// src0 negative inline
+// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12]
+v_mul_i32_i24 v1, -3, v3
+
+// src1 inline
+// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00]
+v_mul_i32_i24 v1, v2, 3
+
+// src1 negative inline
+// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00]
+v_mul_i32_i24 v1, v2, -3
+
+// src0 literal
+// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00]
+v_mul_i32_i24 v1, 100, v3
+
+// src1 negative literal
+// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff]
+v_mul_i32_i24 v1, -100, v3
+
+//===----------------------------------------------------------------------===//
+// Checks for legal operands
+//===----------------------------------------------------------------------===//
+
+// src0 sgpr
+// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12]
+v_mul_i32_i24 v1, s2, v3
+
+// src1 sgpr
+// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00]
+v_mul_i32_i24 v1, v2, s3
+
+// src0, src1 same sgpr
+// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00]
+v_mul_i32_i24 v1, s2, s2
+
+// src0 sgpr, src1 inline
+// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00]
+v_mul_i32_i24 v1, s2, 3
+
+// src0 inline src1 sgpr
+// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00]
+v_mul_i32_i24 v1, 3, s3
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00]
+v_cndmask_b32 v1, v2, v3
+
+// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02]
+v_readlane_b32 s1, v2, s3
+
+// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04]
+v_writelane_b32 v1, s2, s3
+
+// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06]
+v_add_f32 v1, v2, v3
+
+// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08]
+v_sub_f32 v1, v2, v3
+
+// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a]
+v_subrev_f32 v1, v2, v3
+
+// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c]
+v_mac_legacy_f32 v1, v2, v3
+
+// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e]
+v_mul_legacy_f32_e32 v1, v2, v3
+
+// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10]
+v_mul_f32 v1, v2, v3
+
+// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
+v_mul_i32_i24 v1, v2, v3
+
+// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14]
+v_mul_hi_i32_i24 v1, v2, v3
+
+// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16]
+v_mul_u32_u24 v1, v2, v3
+
+// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18]
+v_mul_hi_u32_u24 v1, v2, v3
+
+// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a]
+v_min_legacy_f32_e32 v1, v2, v3
+
+// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c]
+v_max_legacy_f32 v1, v2, v3
+
+// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e]
+v_min_f32_e32 v1, v2, v3
+
+// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20]
+v_max_f32 v1, v2 v3
+
+// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22]
+v_min_i32 v1, v2, v3
+
+// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24]
+v_max_i32 v1, v2, v3
+
+// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26]
+v_min_u32 v1, v2, v3
+
+// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28]
+v_max_u32 v1, v2, v3
+
+// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a]
+v_lshr_b32 v1, v2, v3
+
+// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
+v_lshrrev_b32 v1, v2, v3
+
+// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e]
+v_ashr_i32 v1, v2, v3
+
+// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30]
+v_ashrrev_i32 v1, v2, v3
+
+// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_lshl_b32_e32 v1, v2, v3
+
+// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_lshlrev_b32 v1, v2, v3
+
+// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_and_b32 v1, v2, v3
+
+// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
+v_or_b32 v1, v2, v3
+
+// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
+v_xor_b32 v1, v2, v3
+
+// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
+v_bfm_b32 v1, v2, v3
+
+// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]
+v_mac_f32 v1, v2, v3
+
+// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
+v_madmk_f32 v1, v2, v3, 64.0
+
+// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
+v_madak_f32 v1, v2, v3, 64.0
+
+// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44]
+v_bcnt_u32_b32 v1, v2, v3
+
+// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
+v_mbcnt_lo_u32_b32 v1, v2, v3
+
+// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48]
+v_mbcnt_hi_u32_b32_e32 v1, v2, v3
+
+// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+v_add_i32 v1, v2, v3
+
+// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+v_sub_i32_e32 v1, v2, v3
+
+// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+v_subrev_i32 v1, v2, v3
+
+// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
+v_addc_u32 v1, v2, v3
+
+// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
+v_subb_u32 v1, v2, v3
+
+// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
+v_subbrev_u32 v1, v2, v3
+
+// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
+v_ldexp_f32 v1, v2, v3
+
+// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58]
+v_cvt_pkaccum_u8_f32 v1, v2, v3
+
+// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a]
+v_cvt_pknorm_i16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c]
+v_cvt_pknorm_u16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e]
+v_cvt_pkrtz_f16_f32 v1, v2, v3
+
+// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60]
+v_cvt_pk_u16_u32_e32 v1, v2, v3
+
+// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62]
+v_cvt_pk_i16_i32 v1, v2, v3
diff --git a/test/MC/R600/vop3.s b/test/MC/R600/vop3.s
new file mode 100644
index 0000000..7d1ba0b
--- /dev/null
+++ b/test/MC/R600/vop3.s
@@ -0,0 +1,138 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// VOPC Instructions
+//===----------------------------------------------------------------------===//
+
+//
+// Modifier tests:
+//
+
+v_cmp_lt_f32 s[2:3] -v4, v6
+// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20] 
+
+v_cmp_lt_f32 s[2:3]  v4, -v6
+// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+
+v_cmp_lt_f32 s[2:3] -v4, -v6
+// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
+
+v_cmp_lt_f32 s[2:3] |v4|, v6
+// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_lt_f32 s[2:3] v4, |v6|
+// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_lt_f32 s[2:3] |v4|, |v6|
+// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_lt_f32 s[2:3] -|v4|, v6
+// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
+
+v_cmp_lt_f32 s[2:3] v4, -|v6|
+// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
+
+v_cmp_lt_f32 s[2:3] -|v4|, -|v6|
+// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
+
+//
+// Instruction tests:
+//
+
+v_cmp_f_f32 s[2:3], v4, v6
+// CHECK: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_lt_f32 s[2:3], v4, v6
+// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_eq_f32 s[2:3], v4, v6
+// CHECK: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_le_f32 s[2:3], v4, v6
+// CHECK: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_gt_f32 s[2:3], v4, v6
+// CHECK: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_lg_f32 s[2:3], v4, v6
+// CHECK: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
+
+v_cmp_ge_f32 s[2:3], v4, v6
+// CHECK: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
+
+// TODO: Finish VOPC
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+//
+// Modifier tests:
+// 
+
+v_fract_f32 v1, -v2
+// CHECK: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
+
+v_fract_f32 v1, |v2|
+// CHECK: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
+
+v_fract_f32 v1, -|v2|
+// CHECK: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
+
+v_fract_f32 v1, v2 clamp
+// CHECK: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
+
+v_fract_f32 v1, v2 mul:2
+// CHECK: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
+
+v_fract_f32 v1, v2, div:2 clamp
+// CHECK: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
+
+// TODO: Finish VOP1
+
+///===---------------------------------------------------------------------===//
+// VOP2 Instructions
+///===---------------------------------------------------------------------===//
+
+// TODO: Modifier tests
+
+v_cndmask_b32 v1, v3, v5, s[4:5]
+// CHECK: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
+
+//TODO: readlane, writelane
+
+v_add_f32 v1, v3, s5
+// CHECK: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
+
+v_sub_f32 v1, v3, s5
+// CHECK: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
+
+v_subrev_f32 v1, v3, s5
+// CHECK: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
+
+v_mac_legacy_f32 v1, v3, s5
+// CHECK: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
+
+v_mul_legacy_f32 v1, v3, s5
+// CHECK: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
+
+v_mul_f32 v1, v3, s5
+// CHECK: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
+
+v_mul_i32_i24 v1, v3, s5
+// CHECK: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
+
+///===---------------------------------------------------------------------===//
+// VOP3 Instructions
+///===---------------------------------------------------------------------===//
+
+// TODO: Modifier tests
+
+v_mad_legacy_f32 v2, v4, v6, v8
+// CHECK: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
+
+
+
+
+
diff --git a/test/MC/R600/vopc.s b/test/MC/R600/vopc.s
new file mode 100644
index 0000000..f44919a
--- /dev/null
+++ b/test/MC/R600/vopc.s
@@ -0,0 +1,40 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+
+//===----------------------------------------------------------------------===//
+// Generic Checks
+//===----------------------------------------------------------------------===//
+
+// src0 sgpr
+v_cmp_lt_f32 vcc, s2, v4
+// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c]
+
+// src0 inline immediate
+v_cmp_lt_f32 vcc, 0, v4
+// CHECK: v_cmp_lt_f32_e32 vcc, 0, v4 ; encoding: [0x80,0x08,0x02,0x7c]
+
+// src0 literal
+v_cmp_lt_f32 vcc, 10.0, v4
+// CHECK: v_cmp_lt_f32_e32 vcc, 0x41200000, v4 ; encoding: [0xff,0x08,0x02,0x7c,0x00,0x00,0x20,0x41]
+
+// src0, src1 max vgpr
+v_cmp_lt_f32 vcc, v255, v255
+// CHECK: v_cmp_lt_f32_e32 vcc, v255, v255 ; encoding: [0xff,0xff,0x03,0x7c]
+
+// force 32-bit encoding
+v_cmp_lt_f32_e32 vcc, v2, v4
+// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+v_cmp_f_f32 vcc, v2, v4
+// CHECK: v_cmp_f_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7c]
+
+v_cmp_lt_f32 vcc, v2, v4
+// CHECK: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
+
+// TODO: Add tests for the rest of the instructions.
+
diff --git a/test/MC/SystemZ/insn-bad-z196.s b/test/MC/SystemZ/insn-bad-z196.s
index 47dbe08..1bd70a8 100644
--- a/test/MC/SystemZ/insn-bad-z196.s
+++ b/test/MC/SystemZ/insn-bad-z196.s
@@ -244,6 +244,11 @@
 	cxlgbr	%f0, 16, %r0, 0
 	cxlgbr	%f2, 0, %r0, 0
 
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: etnd	%r7
+
+	etnd	%r7
+
 #CHECK: error: invalid operand
 #CHECK: fidbra	%f0, 0, %f0, -1
 #CHECK: error: invalid operand
@@ -546,6 +551,21 @@
 	locr	%r0,%r0,-1
 	locr	%r0,%r0,16
 
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: ntstg	%r0, 524287(%r1,%r15)
+
+	ntstg	%r0, 524287(%r1,%r15)
+
+#CHECK: error: {{(instruction requires: processor-assist)?}}
+#CHECK: ppa	%r4, %r6, 7
+
+	ppa	%r4, %r6, 7
+
+#CHECK: error: {{(instruction requires: miscellaneous-extensions)?}}
+#CHECK: risbgn	%r1, %r2, 0, 0, 0
+
+	risbgn	%r1, %r2, 0, 0, 0
+
 #CHECK: error: invalid operand
 #CHECK: risbhg	%r0,%r0,0,0,-1
 #CHECK: error: invalid operand
@@ -685,3 +705,24 @@
 	stocg	%r0,-524289,1
 	stocg	%r0,524288,1
 	stocg	%r0,0(%r1,%r2),1
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tabort	4095(%r1)
+
+	tabort	4095(%r1)
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tbegin	4095(%r1), 42
+
+	tbegin	4095(%r1), 42
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tbeginc	4095(%r1), 42
+
+	tbeginc	4095(%r1), 42
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tend
+
+	tend
+
diff --git a/test/MC/SystemZ/insn-bad-zEC12.s b/test/MC/SystemZ/insn-bad-zEC12.s
new file mode 100644
index 0000000..d96e35d
--- /dev/null
+++ b/test/MC/SystemZ/insn-bad-zEC12.s
@@ -0,0 +1,84 @@
+# For zEC12 only.
+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ntstg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ntstg	%r0, 524288
+
+	ntstg	%r0, -524289
+	ntstg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: ppa	%r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: ppa	%r0, %r0, 16
+
+	ppa	%r0, %r0, -1
+	ppa	%r0, %r0, 16
+
+#CHECK: error: invalid operand
+#CHECK: risbgn	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: risbgn	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: risbgn	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: risbgn	%r0,%r0,0,256,0
+#CHECK: error: invalid operand
+#CHECK: risbgn	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: risbgn	%r0,%r0,256,0,0
+
+	risbgn	%r0,%r0,0,0,-1
+	risbgn	%r0,%r0,0,0,64
+	risbgn	%r0,%r0,0,-1,0
+	risbgn	%r0,%r0,0,256,0
+	risbgn	%r0,%r0,-1,0,0
+	risbgn	%r0,%r0,256,0,0
+
+#CHECK: error: invalid operand
+#CHECK: tabort	-1
+#CHECK: error: invalid operand
+#CHECK: tabort	4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tabort	0(%r1,%r2)
+
+	tabort	-1
+	tabort	4096
+	tabort	0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: tbegin	-1, 0
+#CHECK: error: invalid operand
+#CHECK: tbegin	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tbegin	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tbegin	0, -1
+#CHECK: error: invalid operand
+#CHECK: tbegin	0, 65536
+
+	tbegin	-1, 0
+	tbegin	4096, 0
+	tbegin	0(%r1,%r2), 0
+	tbegin	0, -1
+	tbegin	0, 65536
+
+#CHECK: error: invalid operand
+#CHECK: tbeginc	-1, 0
+#CHECK: error: invalid operand
+#CHECK: tbeginc	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tbeginc	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tbeginc	0, -1
+#CHECK: error: invalid operand
+#CHECK: tbeginc	0, 65536
+
+	tbeginc	-1, 0
+	tbeginc	4096, 0
+	tbeginc	0(%r1,%r2), 0
+	tbeginc	0, -1
+	tbeginc	0, 65536
diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s
index a08cb34..0410a41 100644
--- a/test/MC/SystemZ/insn-bad.s
+++ b/test/MC/SystemZ/insn-bad.s
@@ -2666,6 +2666,11 @@
 	pfdrl	1, 1
 	pfdrl	1, 0x100000000
 
+#CHECK: error: {{(instruction requires: population-count)?}}
+#CHECK: popcnt	%r0, %r0
+
+	popcnt	%r0, %r0
+
 #CHECK: error: invalid operand
 #CHECK: risbg	%r0,%r0,0,0,-1
 #CHECK: error: invalid operand
diff --git a/test/MC/SystemZ/insn-good-z196.s b/test/MC/SystemZ/insn-good-z196.s
index db5ecdd..36bea38 100644
--- a/test/MC/SystemZ/insn-good-z196.s
+++ b/test/MC/SystemZ/insn-good-z196.s
@@ -1021,6 +1021,16 @@
 	ork	%r15,%r0,%r0
 	ork	%r7,%r8,%r9
 
+#CHECK: popcnt	%r0, %r0                # encoding: [0xb9,0xe1,0x00,0x00]
+#CHECK: popcnt	%r0, %r15               # encoding: [0xb9,0xe1,0x00,0x0f]
+#CHECK: popcnt	%r15, %r0               # encoding: [0xb9,0xe1,0x00,0xf0]
+#CHECK: popcnt	%r7, %r8                # encoding: [0xb9,0xe1,0x00,0x78]
+
+	popcnt	%r0,%r0
+	popcnt	%r0,%r15
+	popcnt	%r15,%r0
+	popcnt	%r7,%r8
+
 #CHECK: risbhg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x5d]
 #CHECK: risbhg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x5d]
 #CHECK: risbhg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x5d]
diff --git a/test/MC/SystemZ/insn-good-zEC12.s b/test/MC/SystemZ/insn-good-zEC12.s
new file mode 100644
index 0000000..0610de3
--- /dev/null
+++ b/test/MC/SystemZ/insn-good-zEC12.s
@@ -0,0 +1,126 @@
+# For zEC12 and above.
+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 -show-encoding %s | FileCheck %s
+
+#CHECK: etnd	%r0                     # encoding: [0xb2,0xec,0x00,0x00]
+#CHECK: etnd	%r15                    # encoding: [0xb2,0xec,0x00,0xf0]
+#CHECK: etnd	%r7                     # encoding: [0xb2,0xec,0x00,0x70]
+
+	etnd	%r0
+	etnd	%r15
+	etnd	%r7
+
+#CHECK: ntstg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x25]
+#CHECK: ntstg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x25]
+#CHECK: ntstg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x25]
+#CHECK: ntstg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x25]
+#CHECK: ntstg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x25]
+#CHECK: ntstg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x25]
+#CHECK: ntstg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x25]
+#CHECK: ntstg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x25]
+#CHECK: ntstg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x25]
+#CHECK: ntstg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x25]
+
+	ntstg	%r0, -524288
+	ntstg	%r0, -1
+	ntstg	%r0, 0
+	ntstg	%r0, 1
+	ntstg	%r0, 524287
+	ntstg	%r0, 0(%r1)
+	ntstg	%r0, 0(%r15)
+	ntstg	%r0, 524287(%r1,%r15)
+	ntstg	%r0, 524287(%r15,%r1)
+	ntstg	%r15, 0
+
+#CHECK: ppa	%r0, %r0, 0             # encoding: [0xb2,0xe8,0x00,0x00]
+#CHECK: ppa	%r0, %r0, 15            # encoding: [0xb2,0xe8,0xf0,0x00]
+#CHECK: ppa	%r0, %r15, 0            # encoding: [0xb2,0xe8,0x00,0x0f]
+#CHECK: ppa	%r4, %r6, 7             # encoding: [0xb2,0xe8,0x70,0x46]
+#CHECK: ppa	%r15, %r0, 0            # encoding: [0xb2,0xe8,0x00,0xf0]
+
+	ppa	%r0, %r0, 0
+	ppa	%r0, %r0, 15
+	ppa	%r0, %r15, 0
+	ppa	%r4, %r6, 7
+	ppa	%r15, %r0, 0
+
+#CHECK: risbgn	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x59]
+#CHECK: risbgn	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x59]
+#CHECK: risbgn	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x59]
+#CHECK: risbgn	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x59]
+#CHECK: risbgn	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x59]
+#CHECK: risbgn	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x59]
+#CHECK: risbgn	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x59]
+
+	risbgn	%r0,%r0,0,0,0
+	risbgn	%r0,%r0,0,0,63
+	risbgn	%r0,%r0,0,255,0
+	risbgn	%r0,%r0,255,0,0
+	risbgn	%r0,%r15,0,0,0
+	risbgn	%r15,%r0,0,0,0
+	risbgn	%r4,%r5,6,7,8
+
+#CHECK: tabort	0                       # encoding: [0xb2,0xfc,0x00,0x00]
+#CHECK: tabort	0(%r1)                  # encoding: [0xb2,0xfc,0x10,0x00]
+#CHECK: tabort	0(%r15)                 # encoding: [0xb2,0xfc,0xf0,0x00]
+#CHECK: tabort	4095                    # encoding: [0xb2,0xfc,0x0f,0xff]
+#CHECK: tabort	4095(%r1)               # encoding: [0xb2,0xfc,0x1f,0xff]
+#CHECK: tabort	4095(%r15)              # encoding: [0xb2,0xfc,0xff,0xff]
+
+	tabort	0
+	tabort	0(%r1)
+	tabort	0(%r15)
+	tabort	4095
+	tabort	4095(%r1)
+	tabort	4095(%r15)
+
+#CHECK: tbegin	0, 0                    # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
+#CHECK: tbegin	4095, 0                 # encoding: [0xe5,0x60,0x0f,0xff,0x00,0x00]
+#CHECK: tbegin	0, 0                    # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
+#CHECK: tbegin	0, 1                    # encoding: [0xe5,0x60,0x00,0x00,0x00,0x01]
+#CHECK: tbegin	0, 32767                # encoding: [0xe5,0x60,0x00,0x00,0x7f,0xff]
+#CHECK: tbegin	0, 32768                # encoding: [0xe5,0x60,0x00,0x00,0x80,0x00]
+#CHECK: tbegin	0, 65535                # encoding: [0xe5,0x60,0x00,0x00,0xff,0xff]
+#CHECK: tbegin	0(%r1), 42              # encoding: [0xe5,0x60,0x10,0x00,0x00,0x2a]
+#CHECK: tbegin	0(%r15), 42             # encoding: [0xe5,0x60,0xf0,0x00,0x00,0x2a]
+#CHECK: tbegin	4095(%r1), 42           # encoding: [0xe5,0x60,0x1f,0xff,0x00,0x2a]
+#CHECK: tbegin	4095(%r15), 42          # encoding: [0xe5,0x60,0xff,0xff,0x00,0x2a]
+
+	tbegin	0, 0
+	tbegin	4095, 0
+	tbegin	0, 0
+	tbegin	0, 1
+	tbegin	0, 32767
+	tbegin	0, 32768
+	tbegin	0, 65535
+	tbegin	0(%r1), 42
+	tbegin	0(%r15), 42
+	tbegin	4095(%r1), 42
+	tbegin	4095(%r15), 42
+
+#CHECK: tbeginc	0, 0                    # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
+#CHECK: tbeginc	4095, 0                 # encoding: [0xe5,0x61,0x0f,0xff,0x00,0x00]
+#CHECK: tbeginc	0, 0                    # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
+#CHECK: tbeginc	0, 1                    # encoding: [0xe5,0x61,0x00,0x00,0x00,0x01]
+#CHECK: tbeginc	0, 32767                # encoding: [0xe5,0x61,0x00,0x00,0x7f,0xff]
+#CHECK: tbeginc	0, 32768                # encoding: [0xe5,0x61,0x00,0x00,0x80,0x00]
+#CHECK: tbeginc	0, 65535                # encoding: [0xe5,0x61,0x00,0x00,0xff,0xff]
+#CHECK: tbeginc	0(%r1), 42              # encoding: [0xe5,0x61,0x10,0x00,0x00,0x2a]
+#CHECK: tbeginc	0(%r15), 42             # encoding: [0xe5,0x61,0xf0,0x00,0x00,0x2a]
+#CHECK: tbeginc	4095(%r1), 42           # encoding: [0xe5,0x61,0x1f,0xff,0x00,0x2a]
+#CHECK: tbeginc	4095(%r15), 42          # encoding: [0xe5,0x61,0xff,0xff,0x00,0x2a]
+
+	tbeginc	0, 0
+	tbeginc	4095, 0
+	tbeginc	0, 0
+	tbeginc	0, 1
+	tbeginc	0, 32767
+	tbeginc	0, 32768
+	tbeginc	0, 65535
+	tbeginc	0(%r1), 42
+	tbeginc	0(%r15), 42
+	tbeginc	4095(%r1), 42
+	tbeginc	4095(%r15), 42
+
+#CHECK: tend                            # encoding: [0xb2,0xf8,0x00,0x00]
+
+	tend
diff --git a/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s b/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
index a9a78a7..5ce7880 100644
--- a/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
+++ b/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
@@ -1,4 +1,5 @@
 # RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - 2>&1 | FileCheck %s
 
 # CHECK: ERROR: Fragment can't be larger than a bundle size
 
diff --git a/test/MC/X86/AlignedBundling/different-sections.s b/test/MC/X86/AlignedBundling/different-sections.s
index 3e9fcf3..e121532 100644
--- a/test/MC/X86/AlignedBundling/different-sections.s
+++ b/test/MC/X86/AlignedBundling/different-sections.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
 # RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
 
 # Test two different executable sections with bundling.
 
diff --git a/test/MC/X86/AlignedBundling/labeloffset.s b/test/MC/X86/AlignedBundling/labeloffset.s
index 65a0086..5b2efe0 100644
--- a/test/MC/X86/AlignedBundling/labeloffset.s
+++ b/test/MC/X86/AlignedBundling/labeloffset.s
@@ -2,6 +2,8 @@
 # RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
 # RUN: llvm-mc -triple=i686-nacl -filetype=obj %s -o - | \
 # RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
+# RUN: llvm-mc -triple=i686-nacl -filetype=obj -mc-relax-all %s -o - | \
+# RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
 
         .bundle_align_mode 5
         .text
diff --git a/test/MC/X86/AlignedBundling/long-nop-pad.s b/test/MC/X86/AlignedBundling/long-nop-pad.s
index 9b1ec11..36e4f4b 100644
--- a/test/MC/X86/AlignedBundling/long-nop-pad.s
+++ b/test/MC/X86/AlignedBundling/long-nop-pad.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
 # RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
 
 # Test that long nops are generated for padding where possible.
 
diff --git a/test/MC/X86/AlignedBundling/nesting.s b/test/MC/X86/AlignedBundling/nesting.s
index 8996170..74b8fe9 100644
--- a/test/MC/X86/AlignedBundling/nesting.s
+++ b/test/MC/X86/AlignedBundling/nesting.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
 # RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
 
 # Will be bundle-aligning to 16 byte boundaries
   .bundle_align_mode 4
diff --git a/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s b/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
index 6ca4046..158cde8 100644
--- a/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
+++ b/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
 # RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
 
 # Test some variations of padding to the end of a bundle.
 
diff --git a/test/MC/X86/AlignedBundling/pad-bundle-groups.s b/test/MC/X86/AlignedBundling/pad-bundle-groups.s
index b65ee7a..7a9e30c 100644
--- a/test/MC/X86/AlignedBundling/pad-bundle-groups.s
+++ b/test/MC/X86/AlignedBundling/pad-bundle-groups.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
 # RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
 
 # Test some variations of padding for bundle-locked groups.
 
diff --git a/test/MC/X86/AlignedBundling/relax-at-bundle-end.s b/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
index ab4affb..f59ecb0 100644
--- a/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
+++ b/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
 # RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
 
 # Test that an instruction near a bundle end gets properly padded
 # after it is relaxed.
diff --git a/test/MC/X86/AlignedBundling/relax-in-bundle-group.s b/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
index 0a99bb5..d076190 100644
--- a/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
+++ b/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
 # RUN:   | llvm-objdump -disassemble - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble - | FileCheck %s
 
 # Test that instructions inside bundle-locked groups are relaxed even if their
 # fixup is short enough not to warrant relaxation on its own.
diff --git a/test/MC/X86/AlignedBundling/single-inst-bundling.s b/test/MC/X86/AlignedBundling/single-inst-bundling.s
index c0275f4..a7df2c9 100644
--- a/test/MC/X86/AlignedBundling/single-inst-bundling.s
+++ b/test/MC/X86/AlignedBundling/single-inst-bundling.s
@@ -1,5 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
-# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
 
 # Test simple NOP insertion for single instructions.
 
@@ -24,14 +26,17 @@ foo:
   movl    %ebx, %edi
   callq   bar
   cmpl    %r14d, %ebp
+# CHECK-RELAX:   nopl
   jle     .L_ELSE
 # Due to the padding that's inserted before the addl, the jump target
 # becomes farther by one byte.
-# CHECK:         jle 5
+# CHECK-OPT:     jle 5
+# CHECK-RELAX:   jle 7
 
   addl    %ebp, %eax
-# CHECK:          nop
-# CHECK-NEXT:     20: addl
+# CHECK-OPT:     nop
+# CHECK-OPT-NEXT:20: addl
+# CHECK-RELAX:   26: addl
 
   jmp     .L_RET
 .L_ELSE:
diff --git a/test/MC/X86/expand-var.s b/test/MC/X86/expand-var.s
index ef62d8a..8d5529a 100644
--- a/test/MC/X86/expand-var.s
+++ b/test/MC/X86/expand-var.s
@@ -1,6 +1,6 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux < %s | llvm-readobj -r | FileCheck %s
 
-// CHECK:       Section (2) .rela.text {
+// CHECK:       Section {{.*}} .rela.text {
 // CHECK-NEXT:    0x0 R_X86_64_32 d 0x0
 // CHECK-NEXT:  }
 
diff --git a/test/MC/X86/reloc-undef-global.s b/test/MC/X86/reloc-undef-global.s
index a4854d4..24de904 100644
--- a/test/MC/X86/reloc-undef-global.s
+++ b/test/MC/X86/reloc-undef-global.s
@@ -7,7 +7,7 @@ bar = foo + 4
 	.long bar
 
 // ELF:      Relocations [
-// ELF-NEXT:   Section (2) .rela.text {
+// ELF-NEXT:   Section {{.*}} .rela.text {
 // ELF-NEXT:     0x0 R_X86_64_32 foo 0x4
 // ELF-NEXT:   }
 // ELF-NEXT: ]
diff --git a/test/MC/X86/stackmap-nops.ll b/test/MC/X86/stackmap-nops.ll
index a0d4418..33ef862 100644
--- a/test/MC/X86/stackmap-nops.ll
+++ b/test/MC/X86/stackmap-nops.ll
@@ -25,26 +25,26 @@ entry:
 ; CHECK: 7c: 5d
 ; CHECK: 7d: c3
 
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  0)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  1, i32  1)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  2, i32  2)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  3, i32  3)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  4, i32  4)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  5, i32  5)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  6, i32  6)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  7, i32  7)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  8, i32  8)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  9, i32  9)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 10, i32 10)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 11, i32 11)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 12)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 13)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 14)
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 15)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  0, i32  0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  1, i32  1)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  2, i32  2)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  3, i32  3)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  4, i32  4)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  5, i32  5)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  6, i32  6)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  7, i32  7)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  8, i32  8)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  9, i32  9)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 10, i32 10)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 11, i32 11)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 12)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 13)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 14)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 15)
 ; Add an extra stackmap with a zero-length shadow to thwart the shadow
 ; optimization. This will force all 15 bytes of the previous shadow to be
 ; padded with nops.
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0)
   ret void
 }
 
diff --git a/test/MC/X86/x86-16.s b/test/MC/X86/x86-16.s
index 1f87c81..9789cd2 100644
--- a/test/MC/X86/x86-16.s
+++ b/test/MC/X86/x86-16.s
@@ -30,7 +30,7 @@
 // CHECK: movl	%eax, -16(%ebp)          # encoding: [0x67,0x66,0x89,0x45,0xf0]
 	movl	%eax, -16(%ebp)
 
-// CHECK: testb	%bl, %cl                # encoding: [0x84,0xcb]
+// CHECK: testb	%bl, %cl                # encoding: [0x84,0xd9]
         testb %bl, %cl
 
 // CHECK: cmpl	%eax, %ebx              # encoding: [0x66,0x39,0xc3]
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
index 648eb5a..56fd658 100644
--- a/test/MC/X86/x86-32.s
+++ b/test/MC/X86/x86-32.s
@@ -79,7 +79,7 @@
 // CHECK: movl	%eax, -16(%ebp)          # encoding: [0x89,0x45,0xf0]
 	movl	%eax, -16(%ebp)
 
-// CHECK: testb	%bl, %cl                # encoding: [0x84,0xcb]
+// CHECK: testb	%bl, %cl                # encoding: [0x84,0xd9]
         testb %bl, %cl
 
 // CHECK: cmpl	%eax, %ebx              # encoding: [0x39,0xc3]
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 10d420a..096e900 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -613,7 +613,7 @@ movq _foo@GOTPCREL(%rip), %r14
 movq 0x00(%r13,%rax,8),%r13
 
 // CHECK: testq	%rax, %rbx
-// CHECK:  encoding: [0x48,0x85,0xd8]
+// CHECK:  encoding: [0x48,0x85,0xc3]
 testq %rax, %rbx
 
 // CHECK: cmpq	%rbx, %r14
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index 20d7ff9..8a93fa1 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -63,7 +63,7 @@ define i32 @foo() noreturn {
 ; CHECK: Load from block address
   %lb = load i32, i32* bitcast (i8* blockaddress(@foo, %next) to i32*)
 ; CHECK: Call to block address
-  call void()* bitcast (i8* blockaddress(@foo, %next) to void()*)()
+  call void() bitcast (i8* blockaddress(@foo, %next) to void()*)()
 ; CHECK: Undefined behavior: Null pointer dereference
   call void @llvm.stackrestore(i8* null)
 ; CHECK: Undefined behavior: Null pointer dereference
@@ -71,11 +71,11 @@ define i32 @foo() noreturn {
 ; CHECK: Unusual: noalias argument aliases another argument
   call void @has_noaliases(i32* @CG, i32* @CG)
 ; CHECK: Call argument count mismatches callee argument count
-  call void (i32, i32)* bitcast (void (i32)* @one_arg to void (i32, i32)*)(i32 0, i32 0)
+  call void (i32, i32) bitcast (void (i32)* @one_arg to void (i32, i32)*)(i32 0, i32 0)
 ; CHECK: Call argument count mismatches callee argument count
-  call void ()* bitcast (void (i32)* @one_arg to void ()*)()
+  call void () bitcast (void (i32)* @one_arg to void ()*)()
 ; CHECK: Call argument type mismatches callee parameter type
-  call void (float)* bitcast (void (i32)* @one_arg to void (float)*)(float 0.0)
+  call void (float) bitcast (void (i32)* @one_arg to void (float)*)(float 0.0)
 
 ; CHECK: Write to read-only memory
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i32 1, i1 0)
diff --git a/test/TableGen/AsmPredicateCondsEmission.td b/test/TableGen/AsmPredicateCondsEmission.td
new file mode 100644
index 0000000..ba5898f
--- /dev/null
+++ b/test/TableGen/AsmPredicateCondsEmission.td
@@ -0,0 +1,31 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+// Check that we don't generate invalid code of the form "( && Cond2)" when
+// emitting AssemblerPredicate conditions. In the example below, the invalid
+// code would be: "return ( && (Bits & arch::AssemblerCondition2));".
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+def Pred1 : Predicate<"Condition1">;
+def Pred2 : Predicate<"Condition2">,
+            AssemblerPredicate<"AssemblerCondition2">;
+
+def foo : Instruction {
+  let Size = 2;
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  field bits<16> Inst;
+  let Inst = 0xAAAA;
+  let AsmString = "foo";
+  field bits<16> SoftFail = 0;
+  // This is the important bit:
+  let Predicates = [Pred1, Pred2];
+}
+
+// CHECK: return ((Bits & arch::AssemblerCondition2));
diff --git a/test/Transforms/ArgumentPromotion/variadic.ll b/test/Transforms/ArgumentPromotion/variadic.ll
index 0ae52b3..0e03882 100644
--- a/test/Transforms/ArgumentPromotion/variadic.ll
+++ b/test/Transforms/ArgumentPromotion/variadic.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
 entry:
-  tail call void (i8*, i8*, i8*, i8*, i8*, ...)* @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+  tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
   ret i32 0
 }
 
diff --git a/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll b/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
new file mode 100644
index 0000000..1e12c01
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -disable-simplify-libcalls -codegenprepare < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This is a workaround for PR23093: when building with -mkernel/-fno-builtin,
+; we still generate fortified library calls.
+
+; Check that we ignore two things:
+; - attribute nobuiltin
+; - TLI::has (always returns false thanks to -disable-simplify-libcalls)
+
+; CHECK-NOT: _chk
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %len, i32 1, i1 false)
+define void @test_nobuiltin(i8* %dst, i64 %len) {
+  call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin
+  ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll b/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
new file mode 100644
index 0000000..1e41319
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
@@ -0,0 +1,74 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK-LABEL: @test1(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test2(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test4(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: extractvalue
+; CHECK: extractvalue
+; CHECK: select
+define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
+
+; CHECK-LABEL: @test5(
+; CHECK-NOT: llvm.uadd.with.overflow
+; CHECK: next
+define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  store i64 %add, i64* %ptr
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
diff --git a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
index 7aa526f..1baf4ab 100644
--- a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
+++ b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
@@ -5,19 +5,19 @@ target triple = "x86_64-pc-linux-gnu"
 
 declare zeroext i1 @return_i1()
 
-define i32 @test_sor_basic(i32* %base) {
+define i32 @test_sor_basic(i32* %base) gc "statepoint-example" {
 ; CHECK: getelementptr i32, i32* %base, i32 15
 ; CHECK: getelementptr i32, i32* %base-new, i32 15
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
        %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
-define i32 @test_sor_two_derived(i32* %base) {
+define i32 @test_sor_two_derived(i32* %base) gc "statepoint-example" {
 ; CHECK: getelementptr i32, i32* %base, i32 15
 ; CHECK: getelementptr i32, i32* %base, i32 12
 ; CHECK: getelementptr i32, i32* %base-new, i32 15
@@ -25,7 +25,7 @@ define i32 @test_sor_two_derived(i32* %base) {
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
        %ptr2 = getelementptr i32, i32* %base, i32 12
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
        %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
@@ -33,50 +33,50 @@ entry:
        ret i32 %ret
 }
 
-define i32 @test_sor_ooo(i32* %base) {
+define i32 @test_sor_ooo(i32* %base) gc "statepoint-example" {
 ; CHECK: getelementptr i32, i32* %base, i32 15
 ; CHECK: getelementptr i32, i32* %base-new, i32 15
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
-define i32 @test_sor_gep_smallint([3 x i32]* %base) {
+define i32 @test_sor_gep_smallint([3 x i32]* %base) gc "statepoint-example" {
 ; CHECK: getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2
 ; CHECK: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 2
 entry:
        %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
        %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
-define i32 @test_sor_gep_largeint([3 x i32]* %base) {
+define i32 @test_sor_gep_largeint([3 x i32]* %base) gc "statepoint-example" {
 ; CHECK: getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21
 ; CHECK-NOT: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 21
 entry:
        %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
        %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
-define i32 @test_sor_noop(i32* %base) {
+define i32 @test_sor_noop(i32* %base) gc "statepoint-example" {
 ; CHECK: getelementptr i32, i32* %base, i32 15
 ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
 ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
        %ptr2 = getelementptr i32, i32* %base, i32 12
-       %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+       %tok = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
        %ret = load i32, i32* %ptr-new
diff --git a/test/Transforms/ConstantHoisting/X86/stackmap.ll b/test/Transforms/ConstantHoisting/X86/stackmap.ll
index 9df4417..b9aee6b 100644
--- a/test/Transforms/ConstantHoisting/X86/stackmap.ll
+++ b/test/Transforms/ConstantHoisting/X86/stackmap.ll
@@ -7,10 +7,10 @@ target triple = "x86_64-apple-macosx10.9.0"
 define i128 @test1(i128 %a) {
 ; CHECK-LABEL:  @test1
 ; CHECK:        %const = bitcast i128 134646182756734033220 to i128
-; CHECK:        tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 %const)
+; CHECK:        tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 24, i128 %const)
 entry:
   %0 = add i128 %a, 134646182756734033220
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 24, i128 134646182756734033220)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 24, i128 134646182756734033220)
   ret i128 %0
 }
 
diff --git a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
index cdd893f..9bbc275 100644
--- a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
+++ b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
@@ -6,7 +6,7 @@ define internal i32 @test(i32 %A, ...) {
 }
 
 define i32 @foo() {
-	%A = call i32(i32, ...)* @test(i32 1)
+	%A = call i32(i32, ...) @test(i32 1)
 	ret i32 %A
 }
 
diff --git a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
index f049265..ab378e9 100644
--- a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -13,7 +13,7 @@ define internal zeroext i8 @foo(i8* inreg %p, i8 signext %y, ... )  nounwind {
 
 define i32 @bar() {
 ; CHECK: call void @foo(i8 signext 1) [[NUW]]
-  %A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
+  %A = call zeroext i8(i8*, i8, ...) @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
   ret i32 0
 }
 
diff --git a/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll b/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
index a7cfe68..48e4396 100644
--- a/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
@@ -23,7 +23,7 @@ define i32 @main() {
 entry:
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	%tmp = getelementptr [4 x %struct.point], [4 x %struct.point]* @pts, i32 0, i32 0		; <%struct.point*> [#uses=1]
-	%tmp1 = call i32 (i32, ...)* @va1( i32 1, %struct.point* byval  %tmp ) nounwind 		; <i32> [#uses=0]
+	%tmp1 = call i32 (i32, ...) @va1( i32 1, %struct.point* byval  %tmp ) nounwind 		; <i32> [#uses=0]
 	call void @exit( i32 0 ) noreturn nounwind 
 	unreachable
 }
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 730a3f3..b834a75 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -4,10 +4,10 @@
 
 define i8* @vfs_addname(i8* %name, i32 %len, i32 %hash, i32 %flags) nounwind ssp {
 entry:
-  call void @llvm.dbg.value(metadata i8* %name, i64 0, metadata !0, metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i32 %len, i64 0, metadata !10, metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i32 %hash, i64 0, metadata !11, metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i32 %flags, i64 0, metadata !12, metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i8* %name, i64 0, metadata !0, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
+  call void @llvm.dbg.value(metadata i32 %len, i64 0, metadata !10, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
+  call void @llvm.dbg.value(metadata i32 %hash, i64 0, metadata !11, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
+  call void @llvm.dbg.value(metadata i32 %flags, i64 0, metadata !12, metadata !MDExpression()), !dbg !MDLocation(scope: !1)
 ; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !{{[0-9]+}}
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
@@ -17,11 +17,11 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
 entry:
-  call void @llvm.dbg.value(metadata i8* %name, i64 0, metadata !15, metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i32 %len, i64 0, metadata !20, metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i32 %hash, i64 0, metadata !21, metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i8 %extra, i64 0, metadata !22, metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i32 %flags, i64 0, metadata !23, metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i8* %name, i64 0, metadata !15, metadata !MDExpression()), !dbg !MDLocation(scope: !16)
+  call void @llvm.dbg.value(metadata i32 %len, i64 0, metadata !20, metadata !MDExpression()), !dbg !MDLocation(scope: !16)
+  call void @llvm.dbg.value(metadata i32 %hash, i64 0, metadata !21, metadata !MDExpression()), !dbg !MDLocation(scope: !16)
+  call void @llvm.dbg.value(metadata i8 %extra, i64 0, metadata !22, metadata !MDExpression()), !dbg !MDLocation(scope: !16)
+  call void @llvm.dbg.value(metadata i32 %flags, i64 0, metadata !23, metadata !MDExpression()), !dbg !MDLocation(scope: !16)
   %0 = icmp eq i32 %hash, 0, !dbg !24             ; <i1> [#uses=1]
   br i1 %0, label %bb, label %bb1, !dbg !24
 
@@ -74,5 +74,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !26 = !MDLocation(line: 24, scope: !25)
 !27 = !MDLocation(line: 26, scope: !25)
 !28 = !MDFile(filename: "tail.c", directory: "/Users/echeng/LLVM/radars/r7927803/")
-!29 = !{i32 0}
+!29 = !{}
 !30 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll b/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll
index 2321603..7552a12 100644
--- a/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll
+++ b/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll
@@ -19,7 +19,7 @@ l2:
 ; CHECK: define internal i32 @varargs_func(i8* %addr) {
 
 define i32 @caller(i8* %addr) {
-  %r = call i32 (i8*, ...)* @varargs_func(i8* %addr)
+  %r = call i32 (i8*, ...) @varargs_func(i8* %addr)
   ret i32 %r
 }
 ; CHECK: %r = call i32 @varargs_func(i8* %addr)
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index 462ae91..95e18e1 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -22,7 +22,7 @@
 ; Function Attrs: uwtable
 define void @_Z2f2v() #0 {
 entry:
-  call void (i32, ...)* @_ZL2f1iz(i32 1), !dbg !15
+  call void (i32, ...) @_ZL2f1iz(i32 1), !dbg !15
   ret void, !dbg !16
 }
 
diff --git a/test/Transforms/DeadArgElim/dead_vaargs.ll b/test/Transforms/DeadArgElim/dead_vaargs.ll
index c8189c6..3754159 100644
--- a/test/Transforms/DeadArgElim/dead_vaargs.ll
+++ b/test/Transforms/DeadArgElim/dead_vaargs.ll
@@ -1,25 +1,25 @@
 ; RUN: opt < %s -deadargelim -S | FileCheck %s
 
 define i32 @bar(i32 %A) {
-  call void (i32, ...)* @thunk(i32 %A, i64 47, double 1.000000e+00)
-  %a = call i32 (i32, ...)* @has_vastart(i32 %A, i64 47, double 1.000000e+00)
-  %b = call i32 (i32, ...)* @no_vastart( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )
+  call void (i32, ...) @thunk(i32 %A, i64 47, double 1.000000e+00)
+  %a = call i32 (i32, ...) @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+  %b = call i32 (i32, ...) @no_vastart( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )
   %c = add i32 %a, %b
   ret i32 %c
 }
 ; CHECK-LABEL: define i32 @bar
-; CHECK: call void (i32, ...)* @thunk(i32 %A, i64 47, double 1.000000e+00)
-; CHECK: call i32 (i32, ...)* @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call void (i32, ...) @thunk(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call i32 (i32, ...) @has_vastart(i32 %A, i64 47, double 1.000000e+00)
 ; CHECK: call i32 @no_vastart(i32 %A)
 
 declare void @thunk_target(i32 %X, ...)
 
 define internal void @thunk(i32 %X, ...) {
-  musttail call void(i32, ...)* @thunk_target(i32 %X, ...)
+  musttail call void(i32, ...) @thunk_target(i32 %X, ...)
   ret void
 }
 ; CHECK-LABEL: define internal void @thunk(i32 %X, ...)
-; CHECK: musttail call void (i32, ...)* @thunk_target(i32 %X, ...)
+; CHECK: musttail call void (i32, ...) @thunk_target(i32 %X, ...)
 
 define internal i32 @has_vastart(i32 %X, ...) {
   %valist = alloca i8
diff --git a/test/Transforms/DeadArgElim/variadic_safety.ll b/test/Transforms/DeadArgElim/variadic_safety.ll
index 15f57bc..2dac2f9 100644
--- a/test/Transforms/DeadArgElim/variadic_safety.ll
+++ b/test/Transforms/DeadArgElim/variadic_safety.ll
@@ -17,9 +17,9 @@ define internal i32 @va_func(i32 %a, i32 %b, ...) {
 define i32 @call_va(i32 %in) {
   %stacked = alloca i32
   store i32 42, i32* %stacked
-  %res = call i32(i32, i32, ...)* @va_func(i32 %in, i32 %in, [6 x i32] undef, i32* byval %stacked)
+  %res = call i32(i32, i32, ...) @va_func(i32 %in, i32 %in, [6 x i32] undef, i32* byval %stacked)
   ret i32 %res
-; CHECK: call i32 (i32, i32, ...)* @va_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
+; CHECK: call i32 (i32, i32, ...) @va_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
 }
 
 define internal i32 @va_deadret_func(i32 %a, i32 %b, ...) {
@@ -32,7 +32,7 @@ define internal i32 @va_deadret_func(i32 %a, i32 %b, ...) {
 define void @call_deadret(i32 %in) {
   %stacked = alloca i32
   store i32 42, i32* %stacked
-  call i32 (i32, i32, ...)* @va_deadret_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
+  call i32 (i32, i32, ...) @va_deadret_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
   ret void
-; CHECK: call void (i32, i32, ...)* @va_deadret_func(i32 undef, i32 undef, [6 x i32] undef, i32* byval %stacked)
+; CHECK: call void (i32, i32, ...) @va_deadret_func(i32 undef, i32 undef, [6 x i32] undef, i32* byval %stacked)
 }
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
index a7b0aeb..87de2ae 100644
--- a/test/Transforms/DeadStoreElimination/inst-limits.ll
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -118,7 +118,7 @@ entry:
 
   ; Insert a meaningless dbg.value intrinsic; it should have no
   ; effect on the working of DSE in any way.
-  call void @llvm.dbg.value(metadata i32* undef, i64 0, metadata !10, metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32* undef, i64 0, metadata !10, metadata !MDExpression()), !dbg !MDLocation(scope: !4)
 
   ; CHECK:  store i32 -1, i32* @x, align 4
   store i32 -1, i32* @x, align 4
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index dd1443e..2ffe053 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -3,7 +3,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
+declare void @llvm.init.trampoline(i8*, i8*, i8*)
 
 define void @test1(i32* %Q, i32* %P) {
         %DEAD = load i32, i32* %Q
@@ -132,7 +132,7 @@ define void @test11() {
 	%storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
 ; CHECK-NOT: alloca
 	%cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
+	call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
 ; CHECK-NOT: trampoline
 	ret void
 ; CHECK: ret void
diff --git a/test/Transforms/Float2Int/basic.ll b/test/Transforms/Float2Int/basic.ll
new file mode 100644
index 0000000..f4d9469
--- /dev/null
+++ b/test/Transforms/Float2Int/basic.ll
@@ -0,0 +1,256 @@
+; RUN: opt < %s -float2int -S | FileCheck %s
+
+;
+; Positive tests
+;
+
+; CHECK-LABEL: @simple1
+; CHECK:  %1 = zext i8 %a to i32
+; CHECK:  %2 = add i32 %1, 1
+; CHECK:  %3 = trunc i32 %2 to i16
+; CHECK:  ret i16 %3
+define i16 @simple1(i8 %a) {
+  %1 = uitofp i8 %a to float
+  %2 = fadd float %1, 1.0
+  %3 = fptoui float %2 to i16
+  ret i16 %3
+}
+
+; CHECK-LABEL: @simple2
+; CHECK:  %1 = zext i8 %a to i32
+; CHECK:  %2 = sub i32 %1, 1
+; CHECK:  %3 = trunc i32 %2 to i8
+; CHECK:  ret i8 %3
+define i8 @simple2(i8 %a) {
+  %1 = uitofp i8 %a to float
+  %2 = fsub float %1, 1.0
+  %3 = fptoui float %2 to i8
+  ret i8 %3
+}
+
+; CHECK-LABEL: @simple3
+; CHECK:  %1 = zext i8 %a to i32
+; CHECK:  %2 = sub i32 %1, 1
+; CHECK:  ret i32 %2
+define i32 @simple3(i8 %a) {
+  %1 = uitofp i8 %a to float
+  %2 = fsub float %1, 1.0
+  %3 = fptoui float %2 to i32
+  ret i32 %3
+}
+
+; CHECK-LABEL: @cmp
+; CHECK:  %1 = zext i8 %a to i32
+; CHECK:  %2 = zext i8 %b to i32
+; CHECK:  %3 = icmp slt i32 %1, %2
+; CHECK:  ret i1 %3
+define i1 @cmp(i8 %a, i8 %b) {
+  %1 = uitofp i8 %a to float
+  %2 = uitofp i8 %b to float
+  %3 = fcmp ult float %1, %2
+  ret i1 %3
+}
+
+; CHECK-LABEL: @simple4
+; CHECK:  %1 = zext i32 %a to i64
+; CHECK:  %2 = add i64 %1, 1
+; CHECK:  %3 = trunc i64 %2 to i32
+; CHECK:  ret i32 %3
+define i32 @simple4(i32 %a) {
+  %1 = uitofp i32 %a to double
+  %2 = fadd double %1, 1.0
+  %3 = fptoui double %2 to i32
+  ret i32 %3
+}
+
+; CHECK-LABEL: @simple5
+; CHECK:  %1 = zext i8 %a to i32
+; CHECK:  %2 = zext i8 %b to i32
+; CHECK:  %3 = add i32 %1, 1
+; CHECK:  %4 = mul i32 %3, %2
+; CHECK:  ret i32 %4
+define i32 @simple5(i8 %a, i8 %b) {
+  %1 = uitofp i8 %a to float
+  %2 = uitofp i8 %b to float
+  %3 = fadd float %1, 1.0
+  %4 = fmul float %3, %2
+  %5 = fptoui float %4 to i32
+  ret i32 %5
+}
+
+; The two chains don't interact - failure of one shouldn't
+; cause failure of the other.
+
+; CHECK-LABEL: @multi1
+; CHECK:  %1 = zext i8 %a to i32
+; CHECK:  %2 = zext i8 %b to i32
+; CHECK:  %fc = uitofp i8 %c to float
+; CHECK:  %x1 = add i32 %1, %2
+; CHECK:  %z = fadd float %fc, %d
+; CHECK:  %w = fptoui float %z to i32
+; CHECK:  %r = add i32 %x1, %w
+; CHECK:  ret i32 %r
+define i32 @multi1(i8 %a, i8 %b, i8 %c, float %d) {
+  %fa = uitofp i8 %a to float
+  %fb = uitofp i8 %b to float
+  %fc = uitofp i8 %c to float
+  %x = fadd float %fa, %fb
+  %y = fptoui float %x to i32
+  %z = fadd float %fc, %d
+  %w = fptoui float %z to i32
+  %r = add i32 %y, %w
+  ret i32 %r
+}
+
+; CHECK-LABEL: @simple_negzero
+; CHECK:  %1 = zext i8 %a to i32
+; CHECK:  %2 = add i32 %1, 0
+; CHECK:  %3 = trunc i32 %2 to i16
+; CHECK:  ret i16 %3
+define i16 @simple_negzero(i8 %a) {
+  %1 = uitofp i8 %a to float
+  %2 = fadd fast float %1, -0.0
+  %3 = fptoui float %2 to i16
+  ret i16 %3
+}
+
+; CHECK-LABEL: @simple_negative
+; CHECK: %1 = sext i8 %call to i32
+; CHECK: %mul1 = mul i32 %1, -3
+; CHECK: %2 = trunc i32 %mul1 to i8
+; CHECK: %conv3 = sext i8 %2 to i32
+; CHECK: ret i32 %conv3
+define i32 @simple_negative(i8 %call) {
+  %conv1 = sitofp i8 %call to float
+  %mul = fmul float %conv1, -3.000000e+00
+  %conv2 = fptosi float %mul to i8
+  %conv3 = sext i8 %conv2 to i32
+  ret i32 %conv3
+}
+
+;
+; Negative tests
+;
+
+; CHECK-LABEL: @neg_multi1
+; CHECK:  %fa = uitofp i8 %a to float
+; CHECK:  %fc = uitofp i8 %c to float
+; CHECK:  %x = fadd float %fa, %fc
+; CHECK:  %y = fptoui float %x to i32
+; CHECK:  %z = fadd float %fc, %d
+; CHECK:  %w = fptoui float %z to i32
+; CHECK:  %r = add i32 %y, %w
+; CHECK:  ret i32 %r
+; The two chains intersect, which means because one fails, no
+; transform can occur.
+define i32 @neg_multi1(i8 %a, i8 %b, i8 %c, float %d) {
+  %fa = uitofp i8 %a to float
+  %fc = uitofp i8 %c to float
+  %x = fadd float %fa, %fc
+  %y = fptoui float %x to i32
+  %z = fadd float %fc, %d
+  %w = fptoui float %z to i32
+  %r = add i32 %y, %w
+  ret i32 %r
+}
+
+; CHECK-LABEL: @neg_muld
+; CHECK:  %fa = uitofp i32 %a to double
+; CHECK:  %fb = uitofp i32 %b to double
+; CHECK:  %mul = fmul double %fa, %fb
+; CHECK:  %r = fptoui double %mul to i64
+; CHECK:  ret i64 %r
+; The i32 * i32 = i64, which has 64 bits, which is greater than the 52 bits
+; that can be exactly represented in a double.
+define i64 @neg_muld(i32 %a, i32 %b) {
+  %fa = uitofp i32 %a to double
+  %fb = uitofp i32 %b to double
+  %mul = fmul double %fa, %fb
+  %r = fptoui double %mul to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: @neg_mulf
+; CHECK:  %fa = uitofp i16 %a to float
+; CHECK:  %fb = uitofp i16 %b to float
+; CHECK:  %mul = fmul float %fa, %fb
+; CHECK:  %r = fptoui float %mul to i32
+; CHECK:  ret i32 %r
+; The i16 * i16 = i32, which can't be represented in a float, but can in a
+; double. This should fail, as the written code uses floats, not doubles so
+; the original result may be inaccurate.
+define i32 @neg_mulf(i16 %a, i16 %b) {
+  %fa = uitofp i16 %a to float
+  %fb = uitofp i16 %b to float
+  %mul = fmul float %fa, %fb
+  %r = fptoui float %mul to i32
+  ret i32 %r
+}
+
+; CHECK-LABEL: @neg_cmp
+; CHECK:  %1 = uitofp i8 %a to float
+; CHECK:  %2 = uitofp i8 %b to float
+; CHECK:  %3 = fcmp false float %1, %2
+; CHECK:  ret i1 %3
+; "false" doesn't have an icmp equivalent.
+define i1 @neg_cmp(i8 %a, i8 %b) {
+  %1 = uitofp i8 %a to float
+  %2 = uitofp i8 %b to float
+  %3 = fcmp false float %1, %2
+  ret i1 %3
+}
+
+; CHECK-LABEL: @neg_div
+; CHECK:  %1 = uitofp i8 %a to float
+; CHECK:  %2 = fdiv float %1, 1.0
+; CHECK:  %3 = fptoui float %2 to i16
+; CHECK:  ret i16 %3
+; Division isn't a supported operator.
+define i16 @neg_div(i8 %a) {
+  %1 = uitofp i8 %a to float
+  %2 = fdiv float %1, 1.0
+  %3 = fptoui float %2 to i16
+  ret i16 %3
+}
+
+; CHECK-LABEL: @neg_remainder
+; CHECK:  %1 = uitofp i8 %a to float
+; CHECK:  %2 = fadd float %1, 1.2
+; CHECK:  %3 = fptoui float %2 to i16
+; CHECK:  ret i16 %3
+; 1.2 is not an integer.
+define i16 @neg_remainder(i8 %a) {
+  %1 = uitofp i8 %a to float
+  %2 = fadd float %1, 1.25
+  %3 = fptoui float %2 to i16
+  ret i16 %3
+}
+
+; CHECK-LABEL: @neg_toolarge
+; CHECK:  %1 = uitofp i80 %a to fp128
+; CHECK:  %2 = fadd fp128 %1, %1
+; CHECK:  %3 = fptoui fp128 %2 to i80
+; CHECK:  ret i80 %3
+; i80 > i64, which is the largest bitwidth handleable by default.
+define i80 @neg_toolarge(i80 %a) {
+  %1 = uitofp i80 %a to fp128
+  %2 = fadd fp128 %1, %1
+  %3 = fptoui fp128 %2 to i80
+  ret i80 %3
+}
+
+; CHECK-LABEL: @neg_calluser
+; CHECK: sitofp
+; CHECK: fcmp
+; The sequence %1..%3 cannot be converted because %4 uses %2.
+define i32 @neg_calluser(i32 %value) {
+  %1 = sitofp i32 %value to double
+  %2 = fadd double %1, 1.0
+  %3 = fcmp olt double %2, 0.000000e+00
+  %4 = tail call double @g(double %2)
+  %5 = fptosi double %4 to i32
+  %6 = zext i1 %3 to i32
+  %7 = add i32 %6, %5
+  ret i32 %7
+}
+declare double @g(double)
diff --git a/test/Transforms/Float2Int/float2int-optnone.ll b/test/Transforms/Float2Int/float2int-optnone.ll
new file mode 100644
index 0000000..c1eeea7
--- /dev/null
+++ b/test/Transforms/Float2Int/float2int-optnone.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -float2int -S | FileCheck %s
+;
+; Verify that pass float2int is not run on optnone functions.
+
+define i16 @simple1(i8 %a) #0 {
+; CHECK-LABEL: @simple1
+; CHECK:  %1 = uitofp i8 %a to float
+; CHECK-NEXT:  %2 = fadd float %1, 1.0
+; CHECK-NEXT:  %3 = fptoui float %2 to i16
+; CHECK-NEXT:  ret i16 %3
+  %1 = uitofp i8 %a to float
+  %2 = fadd float %1, 1.0
+  %3 = fptoui float %2 to i16
+  ret i16 %3
+}
+
+attributes #0 = { noinline optnone }
diff --git a/test/Transforms/Float2Int/toolarge.ll b/test/Transforms/Float2Int/toolarge.ll
new file mode 100644
index 0000000..b5d7781
--- /dev/null
+++ b/test/Transforms/Float2Int/toolarge.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -float2int -float2int-max-integer-bw=256 -S | FileCheck %s
+
+; CHECK-LABEL: @neg_toolarge
+; CHECK:  %1 = uitofp i80 %a to fp128
+; CHECK:  %2 = fadd fp128 %1, %1
+; CHECK:  %3 = fptoui fp128 %2 to i80
+; CHECK:  ret i80 %3
+; fp128 has a 112-bit mantissa, which can hold an i80. But we only support
+; up to i64, so it should fail (even though the max integer bitwidth is 256).
+define i80 @neg_toolarge(i80 %a) {
+  %1 = uitofp i80 %a to fp128
+  %2 = fadd fp128 %1, %1
+  %3 = fptoui fp128 %2 to i80
+  ret i80 %3
+}
+
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index 23cbc85..55a3dc4 100644
--- a/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -188,7 +188,7 @@ declare void @test6_1(i8* %x6_1, i8* nocapture %y6_1, ...)
 
 ; CHECK: define void @test6_2(i8* %x6_2, i8* nocapture %y6_2, i8* %z6_2)
 define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) {
-  call void (i8*, i8*, ...)* @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2)
+  call void (i8*, i8*, ...) @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2)
   store i32* null, i32** @g
   ret void
 }
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index b4e904c..7f22e6f 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -5,7 +5,7 @@ declare void @test1_1(i8* %x1_1, i8* readonly %y1_1, ...)
 
 ; CHECK: define void @test1_2(i8* %x1_2, i8* readonly %y1_2, i8* %z1_2)
 define void @test1_2(i8* %x1_2, i8* %y1_2, i8* %z1_2) {
-  call void (i8*, i8*, ...)* @test1_1(i8* %x1_2, i8* %y1_2, i8* %z1_2)
+  call void (i8*, i8*, ...) @test1_1(i8* %x1_2, i8* %y1_2, i8* %z1_2)
   store i32 0, i32* @x
   ret void
 }
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 99ef08a..2244261 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -16,7 +16,7 @@ entry:
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports:  !3)
 !1 = !MDFile(filename: "hello.cc", directory: "/home/nlewycky")
 !2 = !MDFile(filename: "hello.cc", directory: "/home/nlewycky")
-!3 = !{i32 0}
+!3 = !{}
 !4 = !{!5}
 !5 = !MDSubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, function: void ()* @_Z3foov, variables: !3)
 !6 = !MDSubroutineType(types: !7)
diff --git a/test/Transforms/GCOVProfiling/return-block.ll b/test/Transforms/GCOVProfiling/return-block.ll
index 0fafc55..1f1933c 100644
--- a/test/Transforms/GCOVProfiling/return-block.ll
+++ b/test/Transforms/GCOVProfiling/return-block.ll
@@ -18,13 +18,13 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Function Attrs: nounwind uwtable
 define void @test() #0 {
 entry:
-  tail call void (...)* @f() #2, !dbg !14
+  tail call void (...) @f() #2, !dbg !14
   %0 = load i32, i32* @A, align 4, !dbg !15
   %tobool = icmp eq i32 %0, 0, !dbg !15
   br i1 %tobool, label %if.end, label %if.then, !dbg !15
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @g() #2, !dbg !16
+  tail call void (...) @g() #2, !dbg !16
   br label %if.end, !dbg !16
 
 if.end:                                           ; preds = %entry, %if.then
diff --git a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
index c30a283..b2e4c64 100644
--- a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
+++ b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -151,7 +151,7 @@ entry:
 bb:		; preds = %cond_next97
 	%tmp1 = load i32, i32* @numi		; <i32> [#uses=1]
 	%tmp2 = getelementptr [44 x i8], [44 x i8]* @.str43, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = call i32 (i8*, ...)* @printf( i8* %tmp2, i32 %tmp1 )		; <i32> [#uses=0]
+	%tmp3 = call i32 (i8*, ...) @printf( i8* %tmp2, i32 %tmp1 )		; <i32> [#uses=0]
 	store i32 0, i32* %i
 	br label %bb13
 
@@ -231,7 +231,7 @@ bb55:		; preds = %bb49
 	store i32 %tmp56, i32* %num_sol
 	%tmp57 = getelementptr [21 x i8], [21 x i8]* @.str44, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp58 = load i32, i32* %num_sol		; <i32> [#uses=1]
-	%tmp59 = call i32 (i8*, ...)* @printf( i8* %tmp57, i32 %tmp58 )		; <i32> [#uses=0]
+	%tmp59 = call i32 (i8*, ...) @printf( i8* %tmp57, i32 %tmp58 )		; <i32> [#uses=0]
 	%tmp60 = load i32, i32* @counters		; <i32> [#uses=1]
 	%tmp61 = icmp ne i32 %tmp60, 0		; <i1> [#uses=1]
 	%tmp6162 = zext i1 %tmp61 to i32		; <i32> [#uses=1]
@@ -241,7 +241,7 @@ bb55:		; preds = %bb49
 cond_true:		; preds = %bb55
 	store i32 0, i32* %total
 	%tmp64 = getelementptr [12 x i8], [12 x i8]* @.str45, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp65 = call i32 (i8*, ...)* @printf( i8* %tmp64 )		; <i32> [#uses=0]
+	%tmp65 = call i32 (i8*, ...) @printf( i8* %tmp64 )		; <i32> [#uses=0]
 	store i32 0, i32* %i
 	br label %bb79
 
@@ -250,7 +250,7 @@ bb66:		; preds = %bb79
 	%tmp68 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp67		; <i32*> [#uses=1]
 	%tmp69 = load i32, i32* %tmp68		; <i32> [#uses=1]
 	%tmp70 = getelementptr [5 x i8], [5 x i8]* @.str46, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp71 = call i32 (i8*, ...)* @printf( i8* %tmp70, i32 %tmp69 )		; <i32> [#uses=0]
+	%tmp71 = call i32 (i8*, ...) @printf( i8* %tmp70, i32 %tmp69 )		; <i32> [#uses=0]
 	%tmp72 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp73 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp72		; <i32*> [#uses=1]
 	%tmp74 = load i32, i32* %tmp73		; <i32> [#uses=1]
@@ -273,7 +273,7 @@ bb79:		; preds = %bb66, %cond_true
 bb85:		; preds = %bb79
 	%tmp86 = getelementptr [12 x i8], [12 x i8]* @.str47, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp87 = load i32, i32* %total		; <i32> [#uses=1]
-	%tmp88 = call i32 (i8*, ...)* @printf( i8* %tmp86, i32 %tmp87 )		; <i32> [#uses=0]
+	%tmp88 = call i32 (i8*, ...) @printf( i8* %tmp86, i32 %tmp87 )		; <i32> [#uses=0]
 	br label %cond_next
 
 cond_next:		; preds = %bb85, %bb55
diff --git a/test/Transforms/GVN/2008-02-12-UndefLoad.ll b/test/Transforms/GVN/2008-02-12-UndefLoad.ll
index 8ebeb14..a1aed86 100644
--- a/test/Transforms/GVN/2008-02-12-UndefLoad.ll
+++ b/test/Transforms/GVN/2008-02-12-UndefLoad.ll
@@ -12,7 +12,7 @@ entry:
         %tmp3 = or i32 %tmp2, 11                ; <i32> [#uses=1]
         %tmp4 = and i32 %tmp3, -21              ; <i32> [#uses=1]
         store i32 %tmp4, i32* %tmp1, align 4
-        %call = call i32 (...)* @x( %struct.anon* %c )          ; <i32> [#uses=0]
+        %call = call i32 (...) @x( %struct.anon* %c )          ; <i32> [#uses=0]
         ret i32 undef
 }
 
diff --git a/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll b/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
index 378d7e7..808f28c 100644
--- a/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
+++ b/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
@@ -163,7 +163,7 @@ bb180:		; preds = %bb179, %bb178
 	br label %bb181
 
 bb181:		; preds = %bb180, %bb170
-	%13 = call %struct.rtvec_def* (i32, ...)* @gen_rtvec(i32 1, %struct.rtx_def* null) nounwind		; <%struct.rtvec_def*> [#uses=0]
+	%13 = call %struct.rtvec_def* (i32, ...) @gen_rtvec(i32 1, %struct.rtx_def* null) nounwind		; <%struct.rtvec_def*> [#uses=0]
 	unreachable
 
 bb211:		; preds = %bb168, %bb167
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/invariant-load.ll
index f126458..982da8c 100644
--- a/test/Transforms/GVN/invariant-load.ll
+++ b/test/Transforms/GVN/invariant-load.ll
@@ -108,7 +108,7 @@ define i32 @test7(i1 %cnd, i32* %p) {
 ; CHECK-NEXT: ret i32 0
 entry:
   %v1 = load i32, i32* %p, !invariant.load !0
-  call i32* (...)* @bar(i32* %p)
+  call i32* (...) @bar(i32* %p)
   %v2 = load i32, i32* %p, !invariant.load !0
   %res = sub i32 %v1, %v2
   ret i32 %res
diff --git a/test/Transforms/GVN/pre-basic-add.ll b/test/Transforms/GVN/pre-basic-add.ll
index 460d1f9..fa4e2e3 100644
--- a/test/Transforms/GVN/pre-basic-add.ll
+++ b/test/Transforms/GVN/pre-basic-add.ll
@@ -6,7 +6,7 @@
 define i32 @test() nounwind {
 entry:
 	%0 = load i32, i32* @H, align 4		; <i32> [#uses=2]
-	%1 = call i32 (...)* @foo() nounwind		; <i32> [#uses=1]
+	%1 = call i32 (...) @foo() nounwind		; <i32> [#uses=1]
 	%2 = icmp ne i32 %1, 0		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %bb1
 
diff --git a/test/Transforms/GVN/pre-compare.ll b/test/Transforms/GVN/pre-compare.ll
index 52c6b0b..a77684a 100644
--- a/test/Transforms/GVN/pre-compare.ll
+++ b/test/Transforms/GVN/pre-compare.ll
@@ -56,7 +56,7 @@ for.cond:                                         ; preds = %for.cond.backedge,
   br i1 %cmp3, label %for.cond.backedge, label %if.end5
 
 if.end5:                                          ; preds = %for.cond
-  %call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str3, i64 0, i64 0), i32 %x) nounwind
+  %call6 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str3, i64 0, i64 0), i32 %x) nounwind
   br label %for.cond.backedge
 
 for.cond.backedge:                                ; preds = %if.end5, %for.cond
diff --git a/test/Transforms/GVN/rle-must-alias.ll b/test/Transforms/GVN/rle-must-alias.ll
index 0d181dd..bf8b139 100644
--- a/test/Transforms/GVN/rle-must-alias.ll
+++ b/test/Transforms/GVN/rle-must-alias.ll
@@ -11,19 +11,19 @@ target triple = "i386-apple-darwin7"
 
 define i32 @test(i32 %i) nounwind {
 entry:
-	%0 = tail call i32 (...)* @foo() nounwind		; <i32> [#uses=1]
+	%0 = tail call i32 (...) @foo() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb1, label %bb
 
 bb:		; preds = %entry
-	%2 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
+	%2 = tail call i32 (...) @bar() nounwind		; <i32> [#uses=0]
 	%3 = getelementptr [100 x i32], [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
 	%4 = load i32, i32* %3, align 4		; <i32> [#uses=1]
 	store i32 %4, i32* @G, align 4
 	br label %bb3
 
 bb1:		; preds = %entry
-	%5 = tail call i32 (...)* @baz() nounwind		; <i32> [#uses=0]
+	%5 = tail call i32 (...) @baz() nounwind		; <i32> [#uses=0]
 	%6 = getelementptr [100 x i32], [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
 	%7 = load i32, i32* %6, align 4		; <i32> [#uses=2]
 	store i32 %7, i32* @G, align 4
diff --git a/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll b/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
index 738ec43..781c57e 100644
--- a/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
+++ b/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
@@ -4,7 +4,7 @@
 declare i1 ()* @getfunc()
 
 define internal i1 @testfunc() {
-        %F = call i1 ()* ()* @getfunc( )                ; <i1 ()*> [#uses=1]
+        %F = call i1 ()* () @getfunc( )                ; <i1 ()*> [#uses=1]
         %c = icmp eq i1 ()* %F, @testfunc               ; <i1> [#uses=1]
         ret i1 %c
 }
diff --git a/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll b/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
index cbc9c75..7a2de55 100644
--- a/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
+++ b/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
@@ -11,7 +11,7 @@ define i8 @func() {
 entry:
         %tmp10 = getelementptr [2 x i32], [2 x i32]* getelementptr ([6 x [2 x i32]], [6 x [2 x i32]]* @aaui1, i32 0, i32 0), i32 5, i32 1           ; <i32*> [#uses=1]
         %tmp11 = load i32, i32* %tmp10, align 4              ; <i32> [#uses=1]
-        %tmp12 = call i32 (...)* @func3( i32* null, i32 0, i32 %tmp11 )         ; <i32> [#uses=0]
+        %tmp12 = call i32 (...) @func3( i32* null, i32 0, i32 %tmp11 )         ; <i32> [#uses=0]
         ret i8 undef
 }
 
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index 8efd018..6933d4a 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -9,7 +9,7 @@ define internal void @f() {
 @a = alias void ()* @f
 
 define void @g() {
-	call void()* @a()
+	call void() @a()
 	ret void
 }
 
@@ -17,7 +17,7 @@ define void @g() {
 ; CHECK-NOT: @b
 
 define void @h() {
-	call void()* @b()
+	call void() @b()
 ; CHECK: call void @g
 	ret void
 }
diff --git a/test/Transforms/GlobalOpt/blockaddress.ll b/test/Transforms/GlobalOpt/blockaddress.ll
index f7f8308..12e09fc 100644
--- a/test/Transforms/GlobalOpt/blockaddress.ll
+++ b/test/Transforms/GlobalOpt/blockaddress.ll
@@ -13,6 +13,9 @@ define void @f() {
 }
 
 define void @g() {
+entry:
+  br label %here
+
 ; CHECK-LABEL: @g(
 
 here:
diff --git a/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
index be13a98..fbbfe69 100644
--- a/test/Transforms/GlobalOpt/constantexpr-dangle.ll
+++ b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
@@ -6,7 +6,7 @@ define internal float @foo() {
 }
 
 define float @bar() {
-        %tmp1 = call float (...)* bitcast (float ()* @foo to float (...)*)( )
+        %tmp1 = call float (...) bitcast (float ()* @foo to float (...)*)( )
         %tmp2 = fmul float %tmp1, 1.000000e+01           ; <float> [#uses=1]
         ret float %tmp2
 }
diff --git a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
index a0b1e84..402ae8c 100644
--- a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -31,7 +31,8 @@ loop:
   br i1 %cmp, label %loop, label %exit
 
 exit:
-  ret i8 %snext
+  %ret = phi i8 [0, %loopguard], [%snext, %loop]
+  ret i8 %ret
 }
 
 ; CHECK-LABEL: @testptrptr(
@@ -56,7 +57,8 @@ loop:
   br i1 %cmp, label %loop, label %exit
 
 exit:
-  ret i8 %snext
+  %ret = phi i8 [0, %loopguard], [%snext, %loop]
+  ret i8 %ret
 }
 
 ; CHECK-LABEL: @testnullptrint(
@@ -86,7 +88,8 @@ loop:
   br i1 %cmp, label %loop, label %exit
 
 exit:
-  ret i8 %snext
+  %ret = phi i8 [0, %loopguard], [%snext, %loop]
+  ret i8 %ret
 }
 
 ; CHECK-LABEL: @testptrint(
@@ -116,7 +119,8 @@ loop:
   br i1 %cmp, label %loop, label %exit
 
 exit:
-  ret i8 %snext
+  %ret = phi i8 [0, %loopguard], [%snext, %loop]
+  ret i8 %ret
 }
 
 ; IV and BECount have two different pointer types here.
diff --git a/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
index 9fb281f..faecbfb 100644
--- a/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
+++ b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
@@ -13,7 +13,7 @@ block9:                                           ; preds = %block9,%func_start
   %undef = phi i64 [ %next_undef, %block9 ], [ undef, %func_start ]
   %iter = phi i64 [ %next_iter, %block9 ], [ 1, %func_start ]
   %next_iter = add nsw i64 %iter, 1
-  %0 = tail call i32 (i8*, ...)* @printf(i8* noalias nocapture getelementptr inbounds ([6 x i8], [6 x i8]* @.str3, i64 0, i64 0), i64 %next_iter, i64 %undef)
+  %0 = tail call i32 (i8*, ...) @printf(i8* noalias nocapture getelementptr inbounds ([6 x i8], [6 x i8]* @.str3, i64 0, i64 0), i64 %next_iter, i64 %undef)
   %next_undef = add nsw i64 %undef, 1
   %_tmp_3 = icmp slt i64 %next_iter, 100
   br i1 %_tmp_3, label %block9, label %exit
diff --git a/test/Transforms/IndVarSimplify/eliminate-max.ll b/test/Transforms/IndVarSimplify/eliminate-max.ll
index 125ed74..5831b93 100644
--- a/test/Transforms/IndVarSimplify/eliminate-max.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-max.ll
@@ -45,7 +45,7 @@ bb14:                                             ; preds = %bb11, %bb7
   br i1 %t20, label %bb1, label %bb21
 
 bb21:                                             ; preds = %bb14
-  %t22 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([4 x i8], [4 x i8]* @0, i32 0, i32 0), i32 %t18) nounwind
+  %t22 = call i32 (i8*, ...) @printf(i8* noalias getelementptr inbounds ([4 x i8], [4 x i8]* @0, i32 0, i32 0), i32 %t18) nounwind
   ret i32 0
 }
 
diff --git a/test/Transforms/IndVarSimplify/lftr-udiv-tripcount.ll b/test/Transforms/IndVarSimplify/lftr-udiv-tripcount.ll
new file mode 100644
index 0000000..8a1bb07
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-udiv-tripcount.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; It is okay to do LFTR on this loop even though the trip count is a
+; division because in this case the division can be optimized to a
+; shift.
+
+define void @foo(i8* %a, i8 %n) nounwind uwtable ssp {
+; CHECK-LABEL: @foo(
+ entry:
+  %e = icmp sgt i8 %n, 3
+  br i1 %e, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop:
+  %i = phi i8 [ 0, %entry ], [ %i.inc, %loop ]
+  %i1 = phi i8 [ 0, %entry ], [ %i1.inc, %loop ]
+  %i.inc = add nsw i8 %i, 4
+  %i1.inc = add i8 %i1, 1
+  store volatile i8 0, i8* %a
+  %c = icmp slt i8 %i, %n
+; CHECK-LABEL:  %exitcond = icmp ne i8 %i1.inc
+  br i1 %c, label %loop, label %exit
+
+ exit:
+; CHECK-LABEL: exit:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/sink-alloca.ll b/test/Transforms/IndVarSimplify/sink-alloca.ll
index 8179470..38c2d31 100644
--- a/test/Transforms/IndVarSimplify/sink-alloca.ll
+++ b/test/Transforms/IndVarSimplify/sink-alloca.ll
@@ -42,7 +42,7 @@ entry:
 
 for.body.i:
   %indvars.iv37.i = phi i64 [ %indvars.iv.next38.i, %for.body.i ], [ 0, %entry ]
-  %call.i = call i8* (...)* @a() nounwind
+  %call.i = call i8* (...) @a() nounwind
   %arrayidx.i = getelementptr inbounds i8*, i8** %vla.i, i64 %indvars.iv37.i
   store i8* %call.i, i8** %arrayidx.i, align 8
   %indvars.iv.next38.i = add i64 %indvars.iv37.i, 1
@@ -51,6 +51,6 @@ for.body.i:
 
 g.exit:
   call void @llvm.stackrestore(i8* %savedstack) nounwind
-  %call1 = call i8* (...)* @a(i8** %vla) nounwind
+  %call1 = call i8* (...) @a(i8** %vla) nounwind
   ret void
 }
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
index 11c5e16..45c703c 100644
--- a/test/Transforms/IndVarSimplify/udiv.ll
+++ b/test/Transforms/IndVarSimplify/udiv.ll
@@ -119,7 +119,7 @@ for.inc35:                                        ; preds = %for.body15, %for.en
 
 while.end:                                        ; preds = %while.cond.while.end_crit_edge, %while.cond.preheader
   %count.0.lcssa = phi i32 [ %count.2.lcssa.lcssa, %while.cond.while.end_crit_edge ], [ 0, %while.cond.preheader ] ; <i32> [#uses=1]
-  %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+  %call40 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
   ret i32 0
 }
 
diff --git a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
index b8ca560..df0b472 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
@@ -9,7 +9,7 @@ LongJmpBlkPost:
         ret i32 0
 
 LongJmpBlkPre:
-        %i.3 = phi i32 [ 0, %entry ], [ 0, %entry ]             ; <i32> [#uses=0]
+        %i.3 = phi i32 [ 0, %entry ]
         %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
                  cleanup
         ret i32 0
diff --git a/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll b/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
index 9af9332..d5416a2 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
@@ -9,7 +9,7 @@ Call2Invoke:            ; preds = %entry
         br label %exit
 
 LongJmpBlkPre:          ; preds = %Call2Invoke, %entry
-        %i.3 = phi i32 [ 0, %entry ], [ 0, %Call2Invoke ]               ; <i32> [#uses=0]
+        %i.3 = phi i32 [ 0, %entry ]
         %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
                  cleanup
         br label %exit
diff --git a/test/Transforms/Inline/alloca-dbgdeclare.ll b/test/Transforms/Inline/alloca-dbgdeclare.ll
index eb912f5..7e649a2 100644
--- a/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -128,7 +128,7 @@ attributes #3 = { noreturn nounwind }
 !43 = !{!37, !37, i64 0}
 !44 = !{!38, !38, i64 0}
 !45 = !MDLocation(line: 9, scope: !15)
-!46 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A", inlinedAt: !47)
+!46 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
 !47 = distinct !MDLocation(line: 11, scope: !21)
 !48 = !MDExpression(DW_OP_bit_piece, 32, 160)
 !49 = !MDLocation(line: 6, scope: !15, inlinedAt: !47)
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
index 53fb13f..a97e6a6 100644
--- a/test/Transforms/Inline/attributes.ll
+++ b/test/Transforms/Inline/attributes.ll
@@ -110,3 +110,53 @@ define i32 @test_sanitize_thread(i32 %arg) sanitize_thread {
 ; CHECK-NEXT: @noattr_callee
 ; CHECK-NEXT: ret i32
 }
+
+; Check that a function doesn't get inlined if target-cpu strings don't match
+; exactly.
+define i32 @test_target_cpu_callee0(i32 %i) "target-cpu"="corei7" {
+  ret i32 %i
+}
+
+define i32 @test_target_cpu0(i32 %i) "target-cpu"="corei7" {
+  %1 = call i32 @test_target_cpu_callee0(i32 %i)
+  ret i32 %1
+; CHECK-LABEL: @test_target_cpu0(
+; CHECK-NOT: @test_target_cpu_callee0
+}
+
+define i32 @test_target_cpu_callee1(i32 %i) "target-cpu"="x86-64" {
+  ret i32 %i
+}
+
+define i32 @test_target_cpu1(i32 %i) "target-cpu"="corei7" {
+  %1 = call i32 @test_target_cpu_callee1(i32 %i)
+  ret i32 %1
+; CHECK-LABEL: @test_target_cpu1(
+; CHECK-NEXT: @test_target_cpu_callee1
+; CHECK-NEXT: ret i32
+}
+
+; Check that a function doesn't get inlined if target-features strings don't
+; match exactly.
+define i32 @test_target_features_callee0(i32 %i)  "target-features"="+sse4.2" {
+  ret i32 %i
+}
+
+define i32 @test_target_features0(i32 %i) "target-features"="+sse4.2" {
+  %1 = call i32 @test_target_features_callee0(i32 %i)
+  ret i32 %1
+; CHECK-LABEL: @test_target_features0(
+; CHECK-NOT: @test_target_features_callee0
+}
+
+define i32 @test_target_features_callee1(i32 %i) "target-features"="+avx2" {
+  ret i32 %i
+}
+
+define i32 @test_target_features1(i32 %i) "target-features"="+sse4.2" {
+  %1 = call i32 @test_target_features_callee1(i32 %i)
+  ret i32 %1
+; CHECK-LABEL: @test_target_features1(
+; CHECK-NEXT: @test_target_features_callee1
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/Inline/devirtualize-2.ll b/test/Transforms/Inline/devirtualize-2.ll
index b7eb1be..cca4e75 100644
--- a/test/Transforms/Inline/devirtualize-2.ll
+++ b/test/Transforms/Inline/devirtualize-2.ll
@@ -2,7 +2,7 @@
 ; PR4834
 
 define i32 @test1() {
-  %funcall1_ = call fastcc i32 ()* ()* @f1()
+  %funcall1_ = call fastcc i32 ()* () @f1()
   %executecommandptr1_ = call i32 %funcall1_()
   ret i32 %executecommandptr1_
 }
diff --git a/test/Transforms/Inline/frameescape.ll b/test/Transforms/Inline/frameescape.ll
new file mode 100644
index 0000000..fb33602
--- /dev/null
+++ b/test/Transforms/Inline/frameescape.ll
@@ -0,0 +1,44 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+; PR23216: We can't inline functions using llvm.frameescape.
+
+declare void @llvm.frameescape(...)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.framerecover(i8*, i8*, i32)
+
+define internal void @foo(i8* %fp) {
+  %a.i8 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @bar to i8*), i8* %fp, i32 0)
+  %a = bitcast i8* %a.i8 to i32*
+  store i32 42, i32* %a
+  ret void
+}
+
+define internal i32 @bar() {
+entry:
+  %a = alloca i32
+  call void (...) @llvm.frameescape(i32* %a)
+  %fp = call i8* @llvm.frameaddress(i32 0)
+  tail call void @foo(i8* %fp)
+  %r = load i32, i32* %a
+  ret i32 %r
+}
+
+; We even bail when someone marks it alwaysinline.
+define internal i32 @bar_alwaysinline() alwaysinline {
+entry:
+  %a = alloca i32
+  call void (...) @llvm.frameescape(i32* %a)
+  tail call void @foo(i8* null)
+  ret i32 0
+}
+
+define i32 @bazz() {
+entry:
+  %r = tail call i32 @bar()
+  %r1 = tail call i32 @bar_alwaysinline()
+  ret i32 %r
+}
+
+; CHECK: define i32 @bazz()
+; CHECK: call i32 @bar()
+; CHECK: call i32 @bar_alwaysinline()
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
index 3e83229..4cbd2fa 100644
--- a/test/Transforms/Inline/ignore-debug-info.ll
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -12,11 +12,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) {
 entry:
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression()), !dbg !MDLocation(scope: !6)
   %mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression()), !dbg !MDLocation(scope: !6)
   %mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression()), !dbg !MDLocation(scope: !6)
   %add = fadd <4 x float> %mul, %mul1
   ret <4 x float> %add
 }
@@ -27,10 +27,10 @@ define float @outer_vectors(<4 x float> %a, <4 x float> %b) {
 ; CHECK: ret float
 
 entry:
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression())
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression()), !dbg !MDLocation(scope: !6)
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression()), !dbg !MDLocation(scope: !6)
   %call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b)
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !MDExpression()), !dbg !MDLocation(scope: !6)
   %vecext = extractelement <4 x float> %call, i32 0
   %vecext1 = extractelement <4 x float> %call, i32 1
   %add = fadd float %vecext, %vecext1
@@ -47,7 +47,7 @@ attributes #0 = { nounwind readnone }
 !llvm.module.flags = !{!3, !4}
 !llvm.ident = !{!5}
 
-!0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !6, globals: !2, imports: !2)
+!0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
 !1 = !MDFile(filename: "test.c", directory: "")
 !2 = !{}
 !3 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/Inline/inline-musttail-varargs.ll b/test/Transforms/Inline/inline-musttail-varargs.ll
index e9ce660..e93ef76 100644
--- a/test/Transforms/Inline/inline-musttail-varargs.ll
+++ b/test/Transforms/Inline/inline-musttail-varargs.ll
@@ -7,16 +7,16 @@ declare void @ext_method(i8*, i32)
 
 define linkonce_odr void @thunk(i8* %this, ...) {
   %this_adj = getelementptr i8, i8* %this, i32 4
-  musttail call void (i8*, ...)* bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* %this_adj, ...)
+  musttail call void (i8*, ...) bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* %this_adj, ...)
   ret void
 }
 
 define void @thunk_caller(i8* %p) {
-  call void (i8*, ...)* @thunk(i8* %p, i32 42)
+  call void (i8*, ...) @thunk(i8* %p, i32 42)
   ret void
 }
 ; CHECK-LABEL: define void @thunk_caller(i8* %p)
-; CHECK: call void (i8*, ...)* @thunk(i8* %p, i32 42)
+; CHECK: call void (i8*, ...) @thunk(i8* %p, i32 42)
 
 ; FIXME: Inline the thunk. This should be significantly easier than inlining
 ; general varargs functions.
diff --git a/test/Transforms/Inline/inline_dbg_declare.ll b/test/Transforms/Inline/inline_dbg_declare.ll
index 4dc1028..e34a43b 100644
--- a/test/Transforms/Inline/inline_dbg_declare.ll
+++ b/test/Transforms/Inline/inline_dbg_declare.ll
@@ -94,6 +94,6 @@ attributes #1 = { nounwind readnone }
 
 ; CHECK: [[FOO:![0-9]+]] = !MDSubprogram(name: "foo",
 ; CHECK: [[BAR:![0-9]+]] = !MDSubprogram(name: "bar",
+; CHECK: [[m23]] = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: [[FOO]]
 ; CHECK: [[CALL_SITE:![0-9]+]] = distinct !MDLocation(line: 8, column: 14, scope: [[BAR]])
-; CHECK: [[m23]] = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: [[FOO]],{{.*}} inlinedAt: [[CALL_SITE]])
 ; CHECK: [[m24]] = !MDLocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE]])
diff --git a/test/Transforms/Inline/inline_ssp.ll b/test/Transforms/Inline/inline_ssp.ll
index c9675ab..1a07723 100644
--- a/test/Transforms/Inline/inline_ssp.ll
+++ b/test/Transforms/Inline/inline_ssp.ll
@@ -13,25 +13,25 @@
 ;  sspreq > sspstrong > ssp > [no ssp]
 define internal void @fun_sspreq() nounwind sspreq uwtable {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str3, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str3, i32 0, i32 0))
   ret void
 }
 
 define internal void @fun_sspstrong() nounwind sspstrong uwtable {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str2, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str2, i32 0, i32 0))
   ret void
 }
 
 define internal void @fun_ssp() nounwind ssp uwtable {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i32 0, i32 0))
   ret void
 }
 
 define internal void @fun_nossp() nounwind uwtable {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0))
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0))
   ret void
 }
 
diff --git a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
index bb9a818..50b9fdb 100644
--- a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
+++ b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -12,7 +12,7 @@ entry:
         br i1 %tmp.1, label %then, label %UnifiedExitNode
 
 then:           ; preds = %entry
-        %tmp.4 = call i32 (...)* @bitmap_clear( i32* %live_head )               ; <i32> [#uses=0]
+        %tmp.4 = call i32 (...) @bitmap_clear( i32* %live_head )               ; <i32> [#uses=0]
         br label %UnifiedExitNode
 
 UnifiedExitNode:                ; preds = %then, %entry
diff --git a/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll b/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
index c1692f7..aff39f8 100644
--- a/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
+++ b/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
@@ -7,7 +7,7 @@ declare void @foo(...)
 
 define void @test(i64 %X) {
         %Y = inttoptr i64 %X to i32*            ; <i32*> [#uses=1]
-        call void (...)* @foo( i32* %Y )
+        call void (...) @foo( i32* %Y )
         ret void
 }
 
diff --git a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
index 20bbd28..113ada3 100644
--- a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
+++ b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
@@ -10,7 +10,7 @@ define i32 @test2(i32 %C) {
 entry:
 	%A = alloca i32
 	%B = alloca i32
-	%tmp = call i32 (...)* @bar( i32* %A )		; <i32> [#uses=0]
+	%tmp = call i32 (...) @bar( i32* %A )		; <i32> [#uses=0]
 	%T = load i32, i32* %A		; <i32> [#uses=1]
 	%tmp2 = icmp eq i32 %C, 0		; <i1> [#uses=1]
 	br i1 %tmp2, label %cond_next, label %cond_true
@@ -23,20 +23,20 @@ cond_true:		; preds = %entry
 
 cond_next:		; preds = %cond_true, %entry
 	%tmp1.0 = phi i32 [ %T1, %cond_true ], [ %T, %entry ]		; <i32> [#uses=1]
-	%tmp7 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp8 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp9 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp10 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp11 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp12 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp13 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp14 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp15 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp16 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp17 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp18 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp19 = call i32 (...)* @baq( )		; <i32> [#uses=0]
-	%tmp20 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp7 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp8 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp9 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp10 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp11 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp12 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp15 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp16 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp18 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp19 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp20 = call i32 (...) @baq( )		; <i32> [#uses=0]
 	ret i32 %tmp1.0
 }
 
diff --git a/test/Transforms/InstCombine/2007-02-07-PointerCast.ll b/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
index 760b6dc..ddc1e03 100644
--- a/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
+++ b/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
@@ -16,7 +16,7 @@ entry:
         %tmp1 = load i8*, i8** %a            ; <i8*> [#uses=1]
         %tmp2 = ptrtoint i8* %tmp1 to i32               ; <i32> [#uses=1]
         %tmp3 = zext i32 %tmp2 to i64           ; <i64> [#uses=1]
-        %tmp.upgrd.1 = call i32 (i8*, ...)* @printf( i8* %tmp, i64 %tmp3 )              ; <i32> [#uses=0]
+        %tmp.upgrd.1 = call i32 (i8*, ...) @printf( i8* %tmp, i64 %tmp3 )              ; <i32> [#uses=0]
         ret i32 0
 }
 
diff --git a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
index 04a5aaa..1232005 100644
--- a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
+++ b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
@@ -5,7 +5,7 @@
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
-	%tmp32 = tail call i32 (i8*  , ...) * bitcast (i32 (i8*, ...)  * @printf to i32 (i8*  , ...)  *)( i8* getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0)  , i32 0 ) nounwind 		; <i32> [#uses=0]
+	%tmp32 = tail call i32 (i8*  , ...) bitcast (i32 (i8*, ...)  * @printf to i32 (i8*  , ...)  *)( i8* getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0)  , i32 0 ) nounwind 		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
index 46d0694..b111b85 100644
--- a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
+++ b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
@@ -21,6 +21,6 @@ entry:
 	%tmp7 = getelementptr %struct.FRAME.nest, %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (...)**> [#uses=1]
 	%tmp89 = bitcast i8* %tramp to i32 (...)*		; <i32 (...)*> [#uses=2]
 	store i32 (...)* %tmp89, i32 (...)** %tmp7, align 8
-	%tmp2.i = call i32 (...)* %tmp89( i32 zeroext 0 )		; <i32> [#uses=1]
+	%tmp2.i = call i32 (...) %tmp89( i32 zeroext 0 )		; <i32> [#uses=1]
 	ret i32 %tmp2.i
 }
diff --git a/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll b/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
index aa38065..1ea0998 100644
--- a/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
+++ b/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
@@ -8,7 +8,7 @@ target triple = "i386-apple-darwin9"
 define void @foo(i8* %context) nounwind  {
 entry:
 	%tmp1 = bitcast i8* %context to %struct.NSRect*		; <%struct.NSRect*> [#uses=1]
-	call void (i32, ...)* @bar( i32 3, %struct.NSRect* byval align 4  %tmp1 ) nounwind 
+	call void (i32, ...) @bar( i32 3, %struct.NSRect* byval align 4  %tmp1 ) nounwind 
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
index e007e6f..9073820 100644
--- a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
+++ b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
@@ -14,7 +14,7 @@ entry:
 	%tmp7 = call i32 @strlen( i8* %tmp1 ) nounwind readonly 		; <i32> [#uses=1]
 	%tmp9 = getelementptr i8, i8* %tmp1, i32 0		; <i8*> [#uses=1]
 	store i8 0, i8* %tmp9, align 1
-	%tmp11 = call i32 (...)* @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
+	%tmp11 = call i32 (...) @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
 	ret i32 %tmp7
 }
 
diff --git a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
index af0ffeb..ce19233 100644
--- a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
+++ b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
@@ -18,7 +18,7 @@ entry:
 	%tmp7 = call i32 @strlen( i8* %tmp1 ) nounwind readonly 		; <i32> [#uses=1]
 	%tmp9 = getelementptr i8, i8* %tmp1, i32 0		; <i8*> [#uses=1]
 	store i8 0, i8* %tmp9, align 1
-	%tmp11 = call i32 (...)* @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
+	%tmp11 = call i32 (...) @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
index ec94623..7e8341b 100644
--- a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
+++ b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
@@ -7,28 +7,28 @@ entry:
 bb:		; preds = %bb16, %entry
 	%i.0 = phi i32 [ 0, %entry ], [ %indvar.next, %somebb ]		; <i32> [#uses=1]
 	%x.0 = phi i32 [ 37, %entry ], [ %tmp17, %somebb ]		; <i32> [#uses=1]
-	%tmp = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
-	%tmp1 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
-	%tmp2 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=1]
+	%tmp = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp1 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp2 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=1]
 	%tmp3 = icmp eq i32 %tmp2, 0		; <i1> [#uses=1]
 	br i1 %tmp3, label %bb7, label %bb5
 
 bb5:		; preds = %bb
-	%tmp6 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp6 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
 	br label %bb7
 
 bb7:		; preds = %bb5, %bb
-	%tmp8 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
-	%tmp9 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp8 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp9 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
 	%tmp11 = icmp eq i32 %x.0, 37		; <i1> [#uses=1]
 	br i1 %tmp11, label %bb14, label %bb16
 
 bb14:		; preds = %bb7
-	%tmp15 = tail call i32 (...)* @bar( ) nounwind 		; <i32> [#uses=0]
+	%tmp15 = tail call i32 (...) @bar( ) nounwind 		; <i32> [#uses=0]
 	br label %bb16
 
 bb16:		; preds = %bb14, %bb7
-	%tmp17 = tail call i32 (...)* @zap( ) nounwind 		; <i32> [#uses=1]
+	%tmp17 = tail call i32 (...) @zap( ) nounwind 		; <i32> [#uses=1]
 	%indvar.next = add i32 %i.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, 42		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %somebb
diff --git a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
index 23ed5aa..9994b58 100644
--- a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
+++ b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
@@ -20,7 +20,7 @@ entry:
 	%3 = getelementptr %struct.Key, %struct.Key* %iospec, i32 0, i32 0		; <{ i32, i32 }*> [#uses=1]
 	%4 = bitcast { i32, i32 }* %3 to i64*		; <i64*> [#uses=1]
 	store i64 %key_token2, i64* %4, align 4
-	%5 = call i32 (...)* @foo(%struct.Key* byval align 4 %iospec, i32* %ret) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (...) @foo(%struct.Key* byval align 4 %iospec, i32* %ret) nounwind		; <i32> [#uses=0]
 	%6 = load i32, i32* %ret, align 4		; <i32> [#uses=1]
 	ret i32 %6
 }
diff --git a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
index cb7431b..b9aa0a2 100644
--- a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
+++ b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
@@ -20,7 +20,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -41,7 +41,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -60,7 +60,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -79,7 +79,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -98,7 +98,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -117,7 +117,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -136,7 +136,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -155,7 +155,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -174,7 +174,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -193,7 +193,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -212,7 +212,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -231,7 +231,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -250,7 +250,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -269,7 +269,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -288,7 +288,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
@@ -307,7 +307,7 @@ entry:
 	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
-	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
index 29ceb6d..5161069 100644
--- a/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
+++ b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @CopyEventArg(%union.anon* %ev) nounwind {
 entry:
-  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind
+  %call = call i32 (i8*, i8*, ...) @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind
 ; CHECK: bitcast %union.anon* %ev to i8*
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
   ret void
diff --git a/test/Transforms/InstCombine/2012-02-13-FCmp.ll b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
index 3c4c6de..586f86d 100644
--- a/test/Transforms/InstCombine/2012-02-13-FCmp.ll
+++ b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
@@ -6,9 +6,9 @@
 declare i32 @printf(i8*, ...)
 define i64 @_Z8tempCastj(i32 %val) uwtable ssp {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str1, i64 0, i64 0), i32 %val)
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str1, i64 0, i64 0), i32 %val)
   %conv = uitofp i32 %val to double
-  %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str, i64 0, i64 0), double %conv)
+  %call.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str, i64 0, i64 0), double %conv)
   %cmp.i = fcmp oge double %conv, -1.000000e+00
   br i1 %cmp.i, label %land.rhs.i, label %if.end.critedge
 ; CHECK:  br i1 true, label %land.rhs.i, label %if.end.critedge
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 81e2083..b61b75e 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -13,14 +13,14 @@ declare void @use(...)
 ; CHECK-NOT: alloca
 define void @test() {
         %X = alloca [0 x i32]           ; <[0 x i32]*> [#uses=1]
-        call void (...)* @use( [0 x i32]* %X )
+        call void (...) @use( [0 x i32]* %X )
         %Y = alloca i32, i32 0          ; <i32*> [#uses=1]
-        call void (...)* @use( i32* %Y )
+        call void (...) @use( i32* %Y )
         %Z = alloca {  }                ; <{  }*> [#uses=1]
-        call void (...)* @use( {  }* %Z )
+        call void (...) @use( {  }* %Z )
         %size = load i32, i32* @int
         %A = alloca {{}}, i32 %size
-        call void (...)* @use( {{}}* %A )
+        call void (...) @use( {{}}* %A )
         ret void
 }
 
@@ -126,7 +126,7 @@ define void @test8() {
 ; NODL: alloca [100 x i32]
 ; NODL: getelementptr inbounds [100 x i32], [100 x i32]* %x1, i64 0, i64 0
   %x = alloca i32, i32 100
-  call void (...)* @use(i32* %x)
+  call void (...) @use(i32* %x)
   ret void
 }
 
@@ -160,6 +160,6 @@ entry:
   %v32 = alloca i1, align 8
   %v64 = alloca i1, i64 1, align 8
   %v33 = alloca i1, i33 1, align 8
-  call void (...)* @use(i1* %v32, i1* %v64, i1* %v33)
+  call void (...) @use(i1* %v32, i1* %v64, i1* %v33)
   ret void
 }
diff --git a/test/Transforms/InstCombine/call-intrinsics.ll b/test/Transforms/InstCombine/call-intrinsics.ll
index f9d1080..3e37a71 100644
--- a/test/Transforms/InstCombine/call-intrinsics.ll
+++ b/test/Transforms/InstCombine/call-intrinsics.ll
@@ -3,17 +3,17 @@
 @X = global i8 0                ; <i8*> [#uses=3]
 @Y = global i8 12               ; <i8*> [#uses=2]
 
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
 
 define void @zero_byte_test() {
         ; These process zero bytes, so they are a noop.
-        call void @llvm.memmove.i32( i8* @X, i8* @Y, i32 0, i32 100 )
-        call void @llvm.memcpy.i32( i8* @X, i8* @Y, i32 0, i32 100 )
-        call void @llvm.memset.i32( i8* @X, i8 123, i32 0, i32 100 )
+        call void @llvm.memmove.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i32 128, i1 false )
+        call void @llvm.memcpy.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i32 128, i1 false )
+        call void @llvm.memset.p0i8.i32( i8* @X, i8 123, i32 0, i32 128, i1 false )
         ret void
 }
 
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index e68c0ad..47ae71f 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -61,7 +61,7 @@ define void @test3a(i8, ...) {unreachable }
 define void @test3(i8 %A, i8 %B) {
 ; CHECK-LABEL: @test3(
 ; CHECK: %1 = zext i8 %B to i32
-; CHECK: call void (i8, ...)* @test3a(i8 %A, i32 %1)
+; CHECK: call void (i8, ...) @test3a(i8 %A, i32 %1)
 ; CHECK: ret void
   call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B)
   ret void
diff --git a/test/Transforms/InstCombine/call2.ll b/test/Transforms/InstCombine/call2.ll
index 467eb07..70a5b3c 100644
--- a/test/Transforms/InstCombine/call2.ll
+++ b/test/Transforms/InstCombine/call2.ll
@@ -4,7 +4,7 @@
 define i32 @bar() {
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	%tmp = call i32 (...)* bitcast (i32 (i8*)* @f to i32 (...)*)( double 3.000000e+00 )		; <i32> [#uses=0]
+	%tmp = call i32 (...) bitcast (i32 (i8*)* @f to i32 (...)*)( double 3.000000e+00 )		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index c577501..c96140e 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -93,9 +93,9 @@ declare void @varargs(i32, ...)
 
 define void @test11(i32* %P) {
         %c = bitcast i32* %P to i16*            ; <i16*> [#uses=1]
-        call void (i32, ...)* @varargs( i32 5, i16* %c )
+        call void (i32, ...) @varargs( i32 5, i16* %c )
         ret void
-; CHECK: call void (i32, ...)* @varargs(i32 5, i32* %P)
+; CHECK: call void (i32, ...) @varargs(i32 5, i32* %P)
 ; CHECK: ret void
 }
 
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index a36487a..8e335b8 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -5,7 +5,7 @@
 
 define void @foo() nounwind ssp {
 ;CHECK: call i32 @putchar{{.+}} !dbg
-  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
+  %1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
   ret void, !dbg !7
 }
 
@@ -17,7 +17,7 @@ declare i32 @printf(i8*, ...)
 
 !0 = !MDSubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
 !1 = !MDFile(filename: "m.c", directory: "/private/tmp")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !4, retainedTypes: !4, subprograms: !9)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{null}
 !5 = !MDLocation(line: 5, column: 2, scope: !6)
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index ee02c89..0a2ea0e 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -55,5 +55,5 @@ entry:
 !26 = !MDFile(filename: "bits.c", directory: "Game")
 !27 = !MDFile(filename: "string.h", directory: "Game")
 !28 = !MDFile(filename: "bits.c", directory: "Game")
-!29 = !{i32 0}
+!29 = !{}
 !30 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/InstCombine/err-rep-cold.ll b/test/Transforms/InstCombine/err-rep-cold.ll
index e4399ab..6c4e4e1 100644
--- a/test/Transforms/InstCombine/err-rep-cold.ll
+++ b/test/Transforms/InstCombine/err-rep-cold.ll
@@ -19,10 +19,10 @@ entry:
 
 if.then:                                          ; preds = %entry
   %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
-  %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
+  %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
   br label %return
 
-; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[AT1:[0-9]+]]
+; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[AT1:[0-9]+]]
 
 return:                                           ; preds = %entry, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 32203b2..6d21f39 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -20,7 +20,7 @@ declare i32 @fprintf(%FILE*, i8*, ...)
 define void @test_simplify1(%FILE* %fp) {
 ; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt)
 ; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
   ret void
 ; CHECK-NEXT: ret void
@@ -31,7 +31,7 @@ define void @test_simplify1(%FILE* %fp) {
 define void @test_simplify2(%FILE* %fp) {
 ; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_c, i32 0, i32 0
-  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8 104)
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, i8 104)
 ; CHECK-NEXT: call i32 @fputc(i32 104, %FILE* %fp)
   ret void
 ; CHECK-NEXT: ret void
@@ -44,7 +44,7 @@ define void @test_simplify3(%FILE* %fp) {
 ; CHECK-LABEL: @test_simplify3(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_s, i32 0, i32 0
   %str = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8* %str)
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, i8* %str)
 ; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
   ret void
 ; CHECK-NEXT: ret void
@@ -55,8 +55,8 @@ define void @test_simplify3(%FILE* %fp) {
 define void @test_simplify4(%FILE* %fp) {
 ; CHECK-IPRINTF-LABEL: @test_simplify4(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_d, i32 0, i32 0
-  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i32 187)
-; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...) @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
   ret void
 ; CHECK-IPRINTF-NEXT: ret void
 }
@@ -64,8 +64,8 @@ define void @test_simplify4(%FILE* %fp) {
 define void @test_no_simplify1(%FILE* %fp) {
 ; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
-  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
-; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
+; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
   ret void
 ; CHECK-IPRINTF-NEXT: ret void
 }
@@ -73,8 +73,8 @@ define void @test_no_simplify1(%FILE* %fp) {
 define void @test_no_simplify2(%FILE* %fp, double %d) {
 ; CHECK-LABEL: @test_no_simplify2(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
-  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double %d)
-; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double %d)
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double %d)
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -82,8 +82,8 @@ define void @test_no_simplify2(%FILE* %fp, double %d) {
 define i32 @test_no_simplify3(%FILE* %fp) {
 ; CHECK-LABEL: @test_no_simplify3(
   %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  %1 = call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
-; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
+  %1 = call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
   ret i32 %1
 ; CHECK-NEXT: ret i32 %1
 }
diff --git a/test/Transforms/InstCombine/gc.relocate.ll b/test/Transforms/InstCombine/gc.relocate.ll
index 4a7ea2c..5ecde19 100644
--- a/test/Transforms/InstCombine/gc.relocate.ll
+++ b/test/Transforms/InstCombine/gc.relocate.ll
@@ -9,13 +9,13 @@ declare zeroext i1 @return_i1()
 declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
 declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
 
-define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) {
+define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
 ; Checks that a dereferenceabler pointer
 ; CHECK-LABEL: @deref
 ; CHECK: call dereferenceable(8)
 entry:
     %load = load i32, i32 addrspace(1)* %dparam
-    %tok = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %tok = tail call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
     %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 4, i32 4)
     ret i32 addrspace(1)* %relocate
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 2e605fb..9bb1b12 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -460,7 +460,7 @@ bb10:
 	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1
 	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1
 	%tmp12 = getelementptr inbounds %struct.x, %struct.x* %tmp45, i32 %tmp12.rec
-	%tmp16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([12 x i8], [12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
+	%tmp16 = call i32 (i8*, ...) @printf( i8* getelementptr ([12 x i8], [12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
 	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
 	%indvar.next = add i32 %indvar, 1
 	br i1 %tmp84, label %bb17, label %bb10
@@ -617,11 +617,11 @@ entry:
 ; Instcombine should be able to fold this getelementptr.
 
 define i32 @test35() nounwind {
-  call i32 (i8*, ...)* @printf(i8* getelementptr ([17 x i8], [17 x i8]* @"\01LC8", i32 0, i32 0),
+  call i32 (i8*, ...) @printf(i8* getelementptr ([17 x i8], [17 x i8]* @"\01LC8", i32 0, i32 0),
              i8* getelementptr (%t1, %t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
   ret i32 0
 ; CHECK-LABEL: @test35(
-; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0, %t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
+; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0, %t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
 }
 
 ; Instcombine should constant-fold the GEP so that indices that have
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 539628a..f9ccf51 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -407,3 +407,13 @@ entry:
   %obit = extractvalue %ov.result.32 %t, 1
   ret i1 %obit
 }
+
+define %ov.result.32 @ssubtest_reorder(i8 %a) {
+  %A = sext i8 %a to i32
+  %x = call %ov.result.32 @llvm.ssub.with.overflow.i32(i32 0, i32 %A)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @ssubtest_reorder
+; CHECK: %x = sub nsw i32 0, %A
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
diff --git a/test/Transforms/InstCombine/objsize-noverify.ll b/test/Transforms/InstCombine/objsize-noverify.ll
new file mode 100644
index 0000000..7e469bd
--- /dev/null
+++ b/test/Transforms/InstCombine/objsize-noverify.ll
@@ -0,0 +1,43 @@
+; Test objectsize bounds checking that won't verify until after -instcombine.
+; RUN: opt < %s -disable-verify -instcombine -S | opt -S | FileCheck %s
+; We need target data to get the sizes of the arrays and structures.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+
+; CHECK-LABEL: @PR13390(
+define i32 @PR13390(i1 %bool, i8* %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+xpto:
+  %select = select i1 %bool, i8* %select, i8* %a
+  %select2 = select i1 %bool, i8* %a, i8* %select2
+  %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true)
+  %2 = add i32 %0, %1
+; CHECK: ret i32 undef
+  ret i32 %2
+
+return:
+  ret i32 42
+}
+
+; CHECK-LABEL: @PR13621(
+define i32 @PR13621(i1 %bool) nounwind {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %gep2 = getelementptr i8, i8* %gep, i32 1
+  %gep = getelementptr i8, i8* %gep2, i32 1
+  %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true)
+; CHECK: ret i32 undef
+  ret i32 %o
+
+return:
+  ret i32 7
+}
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 3125458..335a816 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -219,43 +219,6 @@ define i32 @test13(i8** %esc) {
   ret i32 %1
 }
 
-; CHECK-LABEL: @PR13390(
-define i32 @PR13390(i1 %bool, i8* %a) {
-entry:
-  %cond = or i1 %bool, true
-  br i1 %cond, label %return, label %xpto
-
-xpto:
-  %select = select i1 %bool, i8* %select, i8* %a
-  %select2 = select i1 %bool, i8* %a, i8* %select2
-  %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true)
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true)
-  %2 = add i32 %0, %1
-; CHECK: ret i32 undef
-  ret i32 %2
-
-return:
-  ret i32 42
-}
-
-; CHECK-LABEL: @PR13621(
-define i32 @PR13621(i1 %bool) nounwind {
-entry:
-  %cond = or i1 %bool, true
-  br i1 %cond, label %return, label %xpto
-
-; technically reachable, but this malformed IR may appear as a result of constant propagation
-xpto:
-  %gep2 = getelementptr i8, i8* %gep, i32 1
-  %gep = getelementptr i8, i8* %gep2, i32 1
-  %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true)
-; CHECK: ret i32 undef
-  ret i32 %o
-
-return:
-  ret i32 7
-}
-
 @globalalias = internal alias [60 x i8]* @a
 
 ; CHECK-LABEL: @test18(
diff --git a/test/Transforms/InstCombine/osx-names.ll b/test/Transforms/InstCombine/osx-names.ll
index ed379c5..04d842d 100644
--- a/test/Transforms/InstCombine/osx-names.ll
+++ b/test/Transforms/InstCombine/osx-names.ll
@@ -16,14 +16,14 @@ target triple = "i386-apple-macosx10.7.2"
 define void @test1(%struct.__sFILE* %stream) nounwind {
 ; CHECK-LABEL: define void @test1(
 ; CHECK: call i32 @"fwrite$UNIX2003"
-  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind
   ret void
 }
 
 define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
 ; CHECK-LABEL: define void @test2(
 ; CHECK: call i32 @"fputs$UNIX2003"
-  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
   ret void
 }
 
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index 3eddc80..4c0a8a2 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -97,39 +97,6 @@ if.end:                                           ; preds = %entry
 ; CHECK: ret i8
 }
 
-; CHECK-LABEL: @test5(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test5(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %a
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test6(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test6(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %b
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test7(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test7(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ugt i64 %b, %add
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
 ; CHECK-LABEL: @test8(
 ; PR11438
 ; This is @test1, but the operands are not sign-extended.  Make sure
diff --git a/test/Transforms/InstCombine/pr2996.ll b/test/Transforms/InstCombine/pr2996.ll
index ff3245d..f5e1df4 100644
--- a/test/Transforms/InstCombine/pr2996.ll
+++ b/test/Transforms/InstCombine/pr2996.ll
@@ -5,7 +5,7 @@ define void @func_53(i16 signext %p_56) nounwind {
 entry:
 	%0 = icmp sgt i16 %p_56, -1		; <i1> [#uses=1]
 	%iftmp.0.0 = select i1 %0, i32 -1, i32 0		; <i32> [#uses=1]
-	%1 = call i32 (...)* @func_4(i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	%1 = call i32 (...) @func_4(i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/pr8547.ll b/test/Transforms/InstCombine/pr8547.ll
index f6b3374..6d74b40 100644
--- a/test/Transforms/InstCombine/pr8547.ll
+++ b/test/Transforms/InstCombine/pr8547.ll
@@ -21,6 +21,6 @@ for.cond:                                         ; preds = %for.cond, %codeRepl
   br i1 %tobool, label %for.cond, label %codeRepl2
 
 codeRepl2:                                        ; preds = %for.cond
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i32 %conv2) nounwind
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i32 %conv2) nounwind
   ret i32 0
 }
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
index 3fe79ac..75e11ce 100644
--- a/test/Transforms/InstCombine/printf-1.ll
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -22,7 +22,7 @@ declare i32 @printf(i8*, ...)
 define void @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt)
+  call i32 (i8*, ...) @printf(i8* %fmt)
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -32,7 +32,7 @@ define void @test_simplify1() {
 define void @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt)
+  call i32 (i8*, ...) @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @putchar(i32 104)
   ret void
 ; CHECK-NEXT: ret void
@@ -41,7 +41,7 @@ define void @test_simplify2() {
 define void @test_simplify3() {
 ; CHECK-LABEL: @test_simplify3(
   %fmt = getelementptr [2 x i8], [2 x i8]* @percent, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt)
+  call i32 (i8*, ...) @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @putchar(i32 37)
   ret void
 ; CHECK-NEXT: ret void
@@ -52,7 +52,7 @@ define void @test_simplify3() {
 define void @test_simplify4() {
 ; CHECK-LABEL: @test_simplify4(
   %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt)
+  call i32 (i8*, ...) @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* [[STR]], i32 0, i32 0))
   ret void
 ; CHECK-NEXT: ret void
@@ -63,7 +63,7 @@ define void @test_simplify4() {
 define void @test_simplify5() {
 ; CHECK-LABEL: @test_simplify5(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_c, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt, i8 104)
+  call i32 (i8*, ...) @printf(i8* %fmt, i8 104)
 ; CHECK-NEXT: call i32 @putchar(i32 104)
   ret void
 ; CHECK-NEXT: ret void
@@ -75,7 +75,7 @@ define void @test_simplify6() {
 ; CHECK-LABEL: @test_simplify6(
   %fmt = getelementptr [4 x i8], [4 x i8]* @percent_s, i32 0, i32 0
   %str = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt, i8* %str)
+  call i32 (i8*, ...) @printf(i8* %fmt, i8* %str)
 ; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
   ret void
 ; CHECK-NEXT: ret void
@@ -86,8 +86,8 @@ define void @test_simplify6() {
 define void @test_simplify7() {
 ; CHECK-IPRINTF-LABEL: @test_simplify7(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_d, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt, i32 187)
-; CHECK-IPRINTF-NEXT: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  call i32 (i8*, ...) @printf(i8* %fmt, i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, ...) @iprintf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
   ret void
 ; CHECK-IPRINTF-NEXT: ret void
 }
@@ -95,16 +95,16 @@ define void @test_simplify7() {
 define void @test_no_simplify1() {
 ; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
-  call i32 (i8*, ...)* @printf(i8* %fmt, double 1.87)
-; CHECK-IPRINTF-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  call i32 (i8*, ...) @printf(i8* %fmt, double 1.87)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
   ret void
 ; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify2(i8* %fmt, double %d) {
 ; CHECK-LABEL: @test_no_simplify2(
-  call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
-; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
+  call i32 (i8*, ...) @printf(i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, ...) @printf(i8* %fmt, double %d)
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -112,8 +112,8 @@ define void @test_no_simplify2(i8* %fmt, double %d) {
 define i32 @test_no_simplify3() {
 ; CHECK-LABEL: @test_no_simplify3(
   %fmt = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0
-  %ret = call i32 (i8*, ...)* @printf(i8* %fmt)
-; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @h, i32 0, i32 0))
+  %ret = call i32 (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @h, i32 0, i32 0))
   ret i32 %ret
 ; CHECK-NEXT: ret i32 %ret
 }
diff --git a/test/Transforms/InstCombine/printf-2.ll b/test/Transforms/InstCombine/printf-2.ll
index d685824..d676985 100644
--- a/test/Transforms/InstCombine/printf-2.ll
+++ b/test/Transforms/InstCombine/printf-2.ll
@@ -15,7 +15,7 @@ declare void @printf(i8*, ...)
 define void @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0
-  call void (i8*, ...)* @printf(i8* %fmt)
+  call void (i8*, ...) @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @putchar(i32 104)
   ret void
 ; CHECK-NEXT: ret void
@@ -24,7 +24,7 @@ define void @test_simplify1() {
 define void @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  call void (i8*, ...)* @printf(i8* %fmt)
+  call void (i8*, ...) @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i32 0, i32 0))
   ret void
 ; CHECK-NEXT: ret void
@@ -34,7 +34,7 @@ define void @test_simplify6() {
 ; CHECK-LABEL: @test_simplify6(
   %fmt = getelementptr [4 x i8], [4 x i8]* @percent_s, i32 0, i32 0
   %str = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  call void (i8*, ...)* @printf(i8* %fmt, i8* %str)
+  call void (i8*, ...) @printf(i8* %fmt, i8* %str)
 ; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
   ret void
 ; CHECK-NEXT: ret void
diff --git a/test/Transforms/InstCombine/select-crash-noverify.ll b/test/Transforms/InstCombine/select-crash-noverify.ll
new file mode 100644
index 0000000..4a366aa
--- /dev/null
+++ b/test/Transforms/InstCombine/select-crash-noverify.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -disable-verify -instcombine -S | opt -S | FileCheck %s
+; Formerly crashed, PR8490.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i1 %bool, i32 %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %select = select i1 %bool, i32 %a, i32 %select
+  %select2 = select i1 %bool, i32 %select2, i32 %a
+  %sum = add i32 %select, %select2
+  ret i32 %sum
+
+return:
+  ret i32 7
+}
diff --git a/test/Transforms/InstCombine/select-crash.ll b/test/Transforms/InstCombine/select-crash.ll
index 77446cd..991635b 100644
--- a/test/Transforms/InstCombine/select-crash.ll
+++ b/test/Transforms/InstCombine/select-crash.ll
@@ -30,20 +30,3 @@ define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
   %sel = select i1 %b, <4 x float> %a, <4 x float> %sub 
   ret <4 x float> %sel
 }
-
-; CHECK-LABEL: @test3(
-define i32 @test3(i1 %bool, i32 %a) {
-entry:
-  %cond = or i1 %bool, true
-  br i1 %cond, label %return, label %xpto
-
-; technically reachable, but this malformed IR may appear as a result of constant propagation
-xpto:
-  %select = select i1 %bool, i32 %a, i32 %select
-  %select2 = select i1 %bool, i32 %select2, i32 %a
-  %sum = add i32 %select, %select2
-  ret i32 %sum
-
-return:
-  ret i32 7
-}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index e4cc6f5..ef122c9 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -423,11 +423,11 @@ jump:
   %c = or i1 false, false
   br label %ret 
 ret:
-  %a = phi i1 [true, %jump], [%c, %entry]
-  %b = select i1 %a, i32 10, i32 20
+  %a = phi i1 [true, %entry], [%c, %jump]
+  %b = select i1 %a, i32 20, i32 10
   ret i32 %b
 ; CHECK-LABEL: @test26(
-; CHECK: %a = phi i32 [ 10, %jump ], [ 20, %entry ]
+; CHECK: %a = phi i32 [ 20, %entry ], [ 10, %jump ]
 ; CHECK-NEXT: ret i32 %a
 }
 
diff --git a/test/Transforms/InstCombine/simplify-libcalls.ll b/test/Transforms/InstCombine/simplify-libcalls.ll
index c569cdd..7cc6710 100644
--- a/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 declare i32 @sprintf(i8*, i8*, ...)
 
 define void @foo(i8* %P, i32* %X) {
-	call i32 (i8*, i8*, ...)* @sprintf( i8* %P, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32* %X )		; <i32>:1 [#uses=0]
+	call i32 (i8*, i8*, ...) @sprintf( i8* %P, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32* %X )		; <i32>:1 [#uses=0]
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
index ec188c6..ddf2f2f 100644
--- a/test/Transforms/InstCombine/sprintf-1.ll
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -21,7 +21,7 @@ declare i32 @sprintf(i8*, i8*, ...)
 define void @test_simplify1(i8* %dst) {
 ; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
   ret void
 ; CHECK-NEXT: ret void
@@ -30,7 +30,7 @@ define void @test_simplify1(i8* %dst) {
 define void @test_simplify2(i8* %dst) {
 ; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
 ; CHECK-NEXT: store i8 0, i8* %dst, align 1
   ret void
 ; CHECK-NEXT: ret void
@@ -39,7 +39,7 @@ define void @test_simplify2(i8* %dst) {
 define void @test_simplify3(i8* %dst) {
 ; CHECK-LABEL: @test_simplify3(
   %fmt = getelementptr [7 x i8], [7 x i8]* @null_hello, i32 0, i32 0
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
 ; CHECK-NEXT: store i8 0, i8* %dst, align 1
   ret void
 ; CHECK-NEXT: ret void
@@ -50,7 +50,7 @@ define void @test_simplify3(i8* %dst) {
 define void @test_simplify4(i8* %dst) {
 ; CHECK-LABEL: @test_simplify4(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_c, i32 0, i32 0
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8 104)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i8 104)
 ; CHECK-NEXT: store i8 104, i8* %dst, align 1
 ; CHECK-NEXT: [[NUL:%[a-z0-9]+]] = getelementptr i8, i8* %dst, i32 1
 ; CHECK-NEXT: store i8 0, i8* [[NUL]], align 1
@@ -63,7 +63,7 @@ define void @test_simplify4(i8* %dst) {
 define void @test_simplify5(i8* %dst, i8* %str) {
 ; CHECK-LABEL: @test_simplify5(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_s, i32 0, i32 0
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8* %str)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i8* %str)
 ; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
 ; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false)
@@ -76,8 +76,8 @@ define void @test_simplify5(i8* %dst, i8* %str) {
 define void @test_simplify6(i8* %dst) {
 ; CHECK-IPRINTF-LABEL: @test_simplify6(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_d, i32 0, i32 0
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i32 187)
-; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...) @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
   ret void
 ; CHECK-IPRINTF-NEXT: ret void
 }
@@ -85,16 +85,16 @@ define void @test_simplify6(i8* %dst) {
 define void @test_no_simplify1(i8* %dst) {
 ; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double 1.87)
-; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, double 1.87)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
   ret void
 ; CHECK-IPRINTF-NEXT: ret void
 }
 
 define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) {
 ; CHECK-LABEL: @test_no_simplify2(
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
-; CHECK-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, double %d)
   ret void
 ; CHECK-NEXT: ret void
 }
diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll
index 24c2e00..bf44e4f 100644
--- a/test/Transforms/InstCombine/sqrt.ll
+++ b/test/Transforms/InstCombine/sqrt.ll
@@ -44,7 +44,7 @@ entry:
   %add19 = fadd float undef, %mul18
   %conv = fpext float %add19 to double
   %call34 = call double @sqrt(double %conv) readnone
-  %call36 = call i32 (double)* @foo(double %call34) nounwind
+  %call36 = call i32 (double) @foo(double %call34) nounwind
   %conv38 = fptrunc double %call34 to float
   ret float %conv38
 }
diff --git a/test/Transforms/InstCombine/srem1.ll b/test/Transforms/InstCombine/srem1.ll
index f18690c..31452d8 100644
--- a/test/Transforms/InstCombine/srem1.ll
+++ b/test/Transforms/InstCombine/srem1.ll
@@ -5,7 +5,7 @@
 
 define i32 @func_56(i32 %p_58, i32 %p_59, i32 %p_61, i16 signext %p_62) nounwind {
 entry:
-	%call = call i32 (...)* @rshift_s_s( i32 %p_61, i32 1 )		; <i32> [#uses=1]
+	%call = call i32 (...) @rshift_s_s( i32 %p_61, i32 1 )		; <i32> [#uses=1]
 	%conv = sext i32 %call to i64		; <i64> [#uses=1]
 	%or = or i64 -1734012817166602727, %conv		; <i64> [#uses=1]
 	%rem = srem i64 %or, 1		; <i64> [#uses=1]
diff --git a/test/Transforms/InstCombine/statepoint.ll b/test/Transforms/InstCombine/statepoint.ll
index bee219d..ec99117 100644
--- a/test/Transforms/InstCombine/statepoint.ll
+++ b/test/Transforms/InstCombine/statepoint.ll
@@ -5,9 +5,9 @@
 
 declare void @func()
 
-define i1 @test_negative(i32 addrspace(1)* %p) {
+define i1 @test_negative(i32 addrspace(1)* %p) gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
   %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
@@ -16,9 +16,9 @@ entry:
 ; CHECK: ret i1 %cmp
 }
 
-define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) {
+define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
   %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
@@ -26,9 +26,9 @@ entry:
 ; CHECK: ret i1 false
 }
 
-define i1 @test_null() {
+define i1 @test_null() gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
   %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
@@ -37,9 +37,9 @@ entry:
 ; CHECK: ret i1 true
 }
 
-define i1 @test_undef() {
+define i1 @test_undef() gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
   %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll
index 6dd27e9..1220dfd 100644
--- a/test/Transforms/InstCombine/urem-simplify-bug.ll
+++ b/test/Transforms/InstCombine/urem-simplify-bug.ll
@@ -23,7 +23,7 @@ bb14:		; preds = %entry
 
 bb15:		; preds = %bb14, %bb
 	%iftmp.0.0 = phi i8* [ getelementptr ([5 x i8], [5 x i8]* @.str1, i32 0, i32 0), %bb14 ], [ getelementptr ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), %bb ]		; <i8*> [#uses=1]
-	%tmp17 = call i32 (i8*, ...)* @printf( i8* %iftmp.0.0 ) nounwind 		; <i32> [#uses=0]
+	%tmp17 = call i32 (i8*, ...) @printf( i8* %iftmp.0.0 ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 8a8b834..4245c7a 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -350,19 +350,19 @@ define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
   ret <8 x float> %a
 }
 
-declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i32>)
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
 define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
 ; CHECK-LABEL: @test_vpermilvar_pd(
 ; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> <i32 2, i32 0>)
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
   ret <2 x double> %a
 }
 
-declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i32>)
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
 define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
 ; CHECK-LABEL: @test_vpermilvar_pd_256(
 ; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 1, i32 2, i32 0>)
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
   ret <4 x double> %a
 }
 
@@ -383,14 +383,14 @@ define <8 x float> @test_vpermilvar_ps_256_zero(<8 x float> %v) {
 define <2 x double> @test_vpermilvar_pd_zero(<2 x double> %v) {
 ; CHECK-LABEL: @test_vpermilvar_pd_zero(
 ; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> zeroinitializer)
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
   ret <2 x double> %a
 }
 
 define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
 ; CHECK-LABEL: @test_vpermilvar_pd_256_zero(
 ; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> zeroinitializer)
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
   ret <4 x double> %a
 }
 
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 164e315..d4d7f16 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -414,3 +414,11 @@ define <4 x i32> @pr20114(<4 x i32> %__mask) {
   %masked_new.i.i.i = and <4 x i32> bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>), %mask01.i
   ret <4 x i32> %masked_new.i.i.i
 }
+
+define <2 x i32*> @pr23113(<4 x i32*> %A) {
+; CHECK-LABEL: @pr23113
+; CHECK: %[[V:.*]] = shufflevector <4 x i32*> %A, <4 x i32*> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: ret <2 x i32*> %[[V]]
+  %1 = shufflevector <4 x i32*> %A, <4 x i32*> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32*> %1
+}
diff --git a/test/Transforms/InstCombine/x86-insertps.ll b/test/Transforms/InstCombine/x86-insertps.ll
new file mode 100644
index 0000000..487c727
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-insertps.ll
@@ -0,0 +1,117 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
+
+define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_non_const_imm
+; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
+; CHECK-NEXT:  ret <4 x float>
+}
+
+; If all zero mask bits are set, return a zero regardless of the other control bits.
+
+define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0x0f
+; CHECK-NEXT:  ret <4 x float> zeroinitializer
+}
+define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0xff
+; CHECK-NEXT:  ret <4 x float> zeroinitializer
+}
+
+; If some zero mask bits are set, we do not change anything.
+
+define <4 x float> @insertps_0x03(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 3)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0x03
+; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 3)
+; CHECK-NEXT:  ret <4 x float>
+}
+
+; If no zero mask bits are set, convert to a shuffle.
+
+define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0x00
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  ret <4 x float>
+}
+
+define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0x10
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+; CHECK-NEXT:  ret <4 x float>
+}
+
+define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0x20
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT:  ret <4 x float>
+}
+
+define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0x30
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT:  ret <4 x float>
+}
+
+define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0xc0
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  ret <4 x float>
+}
+
+define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0xd0
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+; CHECK-NEXT:  ret <4 x float>
+}
+
+define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0xe0
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
+; CHECK-NEXT:  ret <4 x float>
+}
+
+define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
+  ret <4 x float> %res
+
+; CHECK-LABEL: @insertps_0xf0
+; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:  ret <4 x float>
+}
+
diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll
index e8b49b6..f1f0b03 100644
--- a/test/Transforms/InstSimplify/undef.ll
+++ b/test/Transforms/InstSimplify/undef.ll
@@ -150,7 +150,7 @@ define i64 @test17(i64 %a) {
 ; @test18
 ; CHECK: ret i64 undef
 define i64 @test18(i64 %a) {
-  %r = call i64 (i64)* undef(i64 %a)
+  %r = call i64 (i64) undef(i64 %a)
   ret i64 %r
 }
 
diff --git a/test/Transforms/JumpThreading/2010-08-26-and.ll b/test/Transforms/JumpThreading/2010-08-26-and.ll
index c0a6b47..cc56ac9 100644
--- a/test/Transforms/JumpThreading/2010-08-26-and.ll
+++ b/test/Transforms/JumpThreading/2010-08-26-and.ll
@@ -147,7 +147,7 @@ _ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134: ; preds = %if.the
   %tmp7.i138 = and i8 %tmp2.i137, 1               ; <i8> [#uses=1]
   %tobool.i139 = icmp eq i8 %tmp7.i138, 0         ; <i1> [#uses=1]
   %retval.0.i = select i1 %tobool.i139, i32 0, i32 %retval.0.i.pre ; <i32> [#uses=1]
-  %call22 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str7, i64 0, i64 0), i32 %retval.0.i) ; <i32> [#uses=0]
+  %call22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str7, i64 0, i64 0), i32 %retval.0.i) ; <i32> [#uses=0]
   %exitcond = icmp eq i64 %tmp146, %tmp145        ; <i1> [#uses=1]
   br i1 %exitcond, label %for.end, label %land.lhs.true.i
 
diff --git a/test/Transforms/JumpThreading/assume.ll b/test/Transforms/JumpThreading/assume.ll
index 89dd0a9..53010b7 100644
--- a/test/Transforms/JumpThreading/assume.ll
+++ b/test/Transforms/JumpThreading/assume.ll
@@ -21,11 +21,11 @@ if.then:                                          ; preds = %entry
   br i1 %cmp2, label %if.then3, label %return
 
 if.then3:                                         ; preds = %if.then
-  tail call void (...)* @bar() #1
+  tail call void (...) @bar() #1
   br label %return
 
 if.else:                                          ; preds = %entry
-  tail call void (...)* @car() #1
+  tail call void (...) @car() #1
   br label %return
 
 return:                                           ; preds = %if.else, %if.then, %if.then3
@@ -43,12 +43,12 @@ entry:
 ; CHECK-LABEL: @test2
 ; CHECK: icmp sgt i32 %a, 5
 ; CHECK: tail call void @llvm.assume
-; CHECK: tail call void (...)* @bar()
+; CHECK: tail call void (...) @bar()
 ; CHECK: ret i32 1
 
 
 if.then:                                          ; preds = %entry
-  tail call void (...)* @bar() #1
+  tail call void (...) @bar() #1
   br label %return
 
 return:                                           ; preds = %entry, %if.then
diff --git a/test/Transforms/JumpThreading/indirectbr.ll b/test/Transforms/JumpThreading/indirectbr.ll
index 59f393a..197ca30 100644
--- a/test/Transforms/JumpThreading/indirectbr.ll
+++ b/test/Transforms/JumpThreading/indirectbr.ll
@@ -79,15 +79,15 @@ entry:
   br label %__here
 
 __here:                                           ; preds = %entry
-  %call = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here) to i64)) nounwind noredzone
+  %call = call i32 (...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here) to i64)) nounwind noredzone
   br label %__here1
 
 __here1:                                          ; preds = %__here
-  %call2 = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here1) to i64)) nounwind noredzone
+  %call2 = call i32 (...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here1) to i64)) nounwind noredzone
   br label %__here3
 
 __here3:                                          ; preds = %__here1
-  %call4 = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here3) to i64)) nounwind noredzone
+  %call4 = call i32 (...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here3) to i64)) nounwind noredzone
   ret void
 }
 
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index 008eac7..f76c1ec 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -8,7 +8,7 @@ target triple = "i386-apple-darwin7"
 define i32 @test1(i32* %P) nounwind {
 ; CHECK-LABEL: @test1(
 entry:
-	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
+	%0 = tail call i32 (...) @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb1, label %bb
 
@@ -26,7 +26,7 @@ bb1:		; preds = %entry, %bb
 	br i1 %3, label %bb3, label %bb2
 
 bb2:		; preds = %bb1
-	%4 = tail call i32 (...)* @f2() nounwind		; <i32> [#uses=0]
+	%4 = tail call i32 (...) @f2() nounwind		; <i32> [#uses=0]
 	ret i32 %res.0
 
 bb3:		; preds = %bb1
@@ -47,7 +47,7 @@ declare i32 @f2(...)
 define i32 @test2(i32* %P) nounwind {
 ; CHECK-LABEL: @test2(
 entry:
-	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
+	%0 = tail call i32 (...) @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb1, label %bb
 
@@ -65,7 +65,7 @@ bb1:		; preds = %entry, %bb
 	br i1 %3, label %bb3, label %bb2
 
 bb2:		; preds = %bb1
-	%4 = tail call i32 (...)* @f2() nounwind
+	%4 = tail call i32 (...) @f2() nounwind
 	ret i32 %res.0
 
 bb3:		; preds = %bb1
diff --git a/test/Transforms/LICM/2011-04-09-RAUW-AST.ll b/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
index f5ef29c..bf069c2 100644
--- a/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
+++ b/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
@@ -41,7 +41,7 @@ for.inc10:                                        ; preds = %for.cond4
 
 for.end13:                                        ; preds = %for.cond
   %tmp14 = load i32, i32* @g_3, align 4
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %tmp14) nounwind
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %tmp14) nounwind
   ret i32 0
 }
 
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index 84bfafd..1bedf7b 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -39,7 +39,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !6 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !7)
 !7 = !MDBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
 !8 = !MDLocation(line: 2, column: 18, scope: !0)
-!9 = !{i32 0}
+!9 = !{}
 !10 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !11, file: !1, type: !13)
 !11 = distinct !MDLexicalBlock(line: 3, column: 3, file: !18, scope: !12)
 !12 = distinct !MDLexicalBlock(line: 2, column: 21, file: !18, scope: !0)
diff --git a/test/Transforms/LoopInterchange/interchange.ll b/test/Transforms/LoopInterchange/interchange.ll
index 30a4a71..3994166 100644
--- a/test/Transforms/LoopInterchange/interchange.ll
+++ b/test/Transforms/LoopInterchange/interchange.ll
@@ -376,7 +376,7 @@ for.body.lr.ph:
 
 for.body:
   %indvars.iv24 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next25, %for.inc10 ]
-  tail call void (...)* @foo()
+  tail call void (...) @foo()
   br label %for.body3
 
 for.body3:
diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll
index 8ec0fbf..6d75888 100644
--- a/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -60,7 +60,7 @@ cond_true:		; preds = %bb
 	%tmp12 = load i32, i32* %tmp11		; <i32> [#uses=1]
 	%tmp13 = load %struct.FILE*, %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
 	%tmp14 = getelementptr [11 x i8], [11 x i8]* @str1, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp15 = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp13, i8* %tmp14, i32 %tmp12 )		; <i32> [#uses=0]
+	%tmp15 = call i32 (%struct.FILE*, i8*, ...) @fprintf( %struct.FILE* %tmp13, i8* %tmp14, i32 %tmp12 )		; <i32> [#uses=0]
 	%tmp16 = load i32, i32* %c		; <i32> [#uses=1]
 	%tmp17 = add i32 %tmp16, 1		; <i32> [#uses=1]
 	store i32 %tmp17, i32* %c
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 075cdf9..88348b0 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -72,7 +72,7 @@ for.body:
 
 for.inc:
   %dec = add i64 %i.0, -1
-  tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !0), metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !0), metadata !MDExpression()), !dbg !MDLocation(scope: !0)
   br label %for.cond
 
 for.end:
diff --git a/test/Transforms/LoopSimplify/dup-preds.ll b/test/Transforms/LoopSimplify/dup-preds.ll
index 3d1f149..c9253fa 100644
--- a/test/Transforms/LoopSimplify/dup-preds.ll
+++ b/test/Transforms/LoopSimplify/dup-preds.ll
@@ -28,7 +28,7 @@ for.body305:                                      ; preds = %for.body305, %if.th
   br label %for.body305
 
 for.body344:                                      ; preds = %for.body344, %for.body276.lr.ph, %for.body276.lr.ph
-  %indvar = phi i64 [ %indvar.next, %for.body344 ], [ 0, %for.body276.lr.ph ]
+  %indvar = phi i64 [ %indvar.next, %for.body344 ], [ 0, %for.body276.lr.ph ], [ 0, %for.body276.lr.ph ]
   %indvars.iv552 = phi i64 [ %indvars.iv.next553, %for.body344 ], [ 0, %for.body276.lr.ph ], [ 0, %for.body276.lr.ph ]
   %indvars.iv.next553 = add nuw nsw i64 %indvars.iv552, 1
   %indvar.next = add i64 %indvar, 1
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
index d1454cf..394969c 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
@@ -28,7 +28,7 @@ entry:
 	tail call void @func_1( ) nounwind
 	load volatile i16, i16* @g_3, align 2		; <i16>:0 [#uses=1]
 	zext i16 %0 to i32		; <i32>:1 [#uses=1]
-	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), i32 %1 ) nounwind		; <i32>:2 [#uses=0]
+	tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), i32 %1 ) nounwind		; <i32>:2 [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll b/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
index 481569c..5904434 100644
--- a/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
@@ -45,7 +45,7 @@ define i32 @main() nounwind {
 entry:
 	tail call void @func_1( ) nounwind
 	load i32, i32* @g_19, align 4		; <i32>:0 [#uses=1]
-	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), i32 %0 ) nounwind		; <i32>:1 [#uses=0]
+	tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), i32 %0 ) nounwind		; <i32>:1 [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll b/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
index 8304f76..c287b10 100644
--- a/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
+++ b/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
@@ -34,6 +34,6 @@ nactive_heaps.exit:		; preds = %bb2.i3, %bb1.i
 	%3 = load i32, i32* @heap_size, align 4		; <i32> [#uses=1]
 	%4 = mul i32 %3, %m.0.i		; <i32> [#uses=1]
 	%5 = sub i32 %4, 0		; <i32> [#uses=1]
-	%6 = tail call i32 (i8*, i8*, ...)* @sprintf(i8* null, i8* getelementptr ([39 x i8], [39 x i8]* @"\01LC85", i32 0, i32 0), i32 %m.0.i, i32 0, i32 %5, i32 0) nounwind		; <i32> [#uses=0]
+	%6 = tail call i32 (i8*, i8*, ...) @sprintf(i8* null, i8* getelementptr ([39 x i8], [39 x i8]* @"\01LC85", i32 0, i32 0), i32 %m.0.i, i32 0, i32 %5, i32 0) nounwind		; <i32> [#uses=0]
 	ret %struct.obj* null
 }
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
index dba97f5..aaac868 100644
--- a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -47,8 +47,8 @@ bb:
   br i1 %tmp4, label %bb6, label %bb5
 
 bb5:                                              ; preds = %bb
-  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2021) nounwind
-  tail call void (...)* @snork(i8* getelementptr inbounds (%struct.jim, %struct.jim* @global3, i64 0, i32 3, i64 1), i32 -2146631418) nounwind
+  tail call void (...) @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2021) nounwind
+  tail call void (...) @snork(i8* getelementptr inbounds (%struct.jim, %struct.jim* @global3, i64 0, i32 3, i64 1), i32 -2146631418) nounwind
   unreachable
 
 bb6:                                              ; preds = %bb
@@ -95,8 +95,8 @@ bb30:                                             ; preds = %bb22
   br i1 %tmp31, label %bb33, label %bb32
 
 bb32:                                             ; preds = %bb30, %bb26, %bb17
-  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2038) nounwind
-  tail call void (...)* @snork(i8* %tmp11, i32 -2146631418) nounwind
+  tail call void (...) @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2038) nounwind
+  tail call void (...) @snork(i8* %tmp11, i32 -2146631418) nounwind
   unreachable
 
 bb33:                                             ; preds = %bb30
@@ -141,8 +141,8 @@ bb55:                                             ; preds = %bb48
   br i1 %tmp57, label %bb59, label %bb58
 
 bb58:                                             ; preds = %bb55, %bb52, %bb43
-  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2055) nounwind
-  tail call void (...)* @snork(i8* %tmp38, i32 -2146631418) nounwind
+  tail call void (...) @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2055) nounwind
+  tail call void (...) @snork(i8* %tmp38, i32 -2146631418) nounwind
   br label %bb247
 
 bb59:                                             ; preds = %bb55
@@ -168,7 +168,7 @@ bb68:                                             ; preds = %bb59
   ]
 
 bb69:                                             ; preds = %bb68
-  tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2071) nounwind
+  tail call void (...) @snork(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @global1, i64 0, i64 0), i32 2071) nounwind
   %tmp70 = load i32, i32* getelementptr inbounds (%struct.snork, %struct.snork* @global, i64 0, i32 2), align 4
   unreachable
 
diff --git a/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
index ba03597..f862c26 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
@@ -54,7 +54,7 @@ return:		; preds = %bb, %entry
 ; Unable to eliminate cast due to potentional overflow.
 define void @foobar3() nounwind {
 entry:
-	tail call i32 (...)* @nn( ) nounwind		; <i32>:0 [#uses=1]
+	tail call i32 (...) @nn( ) nounwind		; <i32>:0 [#uses=1]
 	icmp eq i32 %0, 0		; <i1>:1 [#uses=1]
 	br i1 %1, label %return, label %bb
 
@@ -64,7 +64,7 @@ bb:		; preds = %bb, %entry
 	uitofp i32 %i.03 to double		; <double>:2 [#uses=1]
 	tail call void @foo( double %2 ) nounwind
 	add i32 %i.03, 1		; <i32>:3 [#uses=2]
-	tail call i32 (...)* @nn( ) nounwind		; <i32>:4 [#uses=1]
+	tail call i32 (...) @nn( ) nounwind		; <i32>:4 [#uses=1]
 	icmp ugt i32 %4, %3		; <i1>:5 [#uses=1]
 	br i1 %5, label %bb, label %return
 
diff --git a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
index 093cf65..5b7bb88 100644
--- a/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
@@ -55,7 +55,7 @@ for.end:                                          ; preds = %fn3.exit
   %conv7 = sext i8 %inc to i32
   %add = add nsw i32 %conv7, %conv
   store i32 %add, i32* @e, align 4
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
   ret i32 0
 }
 
diff --git a/test/Transforms/LoopStrengthReduce/different-type-ivs.ll b/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
index 8cdd264..c24f877 100644
--- a/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
+++ b/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
@@ -11,7 +11,7 @@ no_exit:		; preds = %no_exit, %entry
 	%X.0.0 = mul i16 %indvar.upgrd.1, 1234		; <i16> [#uses=1]
 	%tmp. = mul i32 %indvar, 1234		; <i32> [#uses=1]
 	%tmp.5 = sext i16 %X.0.0 to i32		; <i32> [#uses=1]
-	%tmp.3 = call i32 (...)* @bar( i32 %tmp.5, i32 %tmp. )		; <i32> [#uses=0]
+	%tmp.3 = call i32 (...) @bar( i32 %tmp.5, i32 %tmp. )		; <i32> [#uses=0]
 	%tmp.0 = call i1 @pred( )		; <i1> [#uses=1]
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
 	br i1 %tmp.0, label %return, label %no_exit
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
index df969ee..9bdbf4e 100644
--- a/test/Transforms/LoopStrengthReduce/pr12018.ll
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -16,7 +16,7 @@ for.body:                                         ; preds = %_ZN8nsTArray9Elemen
   %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
   %arrayidx = getelementptr inbounds %struct.nsTArray, %struct.nsTArray* %tmp, i32 %i.06
   %add = add nsw i32 %i.06, 1
-  call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !{}) nounwind
+  call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !MDExpression()) nounwind, !dbg !MDLocation(scope: !MDSubprogram())
   br label %_ZN8nsTArray9ElementAtEi.exit
 
 _ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
@@ -35,4 +35,4 @@ declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-!0 = !MDLocalVariable(tag: DW_TAG_arg_variable)
+!0 = !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !MDSubprogram())
diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll
index 42d960f..11c9c4e 100644
--- a/test/Transforms/LoopStrengthReduce/pr18165.ll
+++ b/test/Transforms/LoopStrengthReduce/pr18165.ll
@@ -64,7 +64,7 @@ fn1.exit:                                         ; preds = %lor.end.i
   store i32 %add.i, i32* getelementptr inbounds (%struct.anon, %struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8
   store i32 0, i32* @h, align 4, !tbaa !7
   %3 = load i32, i32* @b, align 4, !tbaa !7
-  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
   ret i32 0
 }
 
diff --git a/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll b/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
new file mode 100644
index 0000000..6b1943f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -unroll-runtime -loop-unroll < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;; Check that we don't emit expensive instructions to compute trip
+;; counts when unrolling loops.
+
+define i32 @test(i64 %v12, i8* %array, i64* %loc) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: udiv
+entry:
+  %step = load i64, i64* %loc, !range !0
+  br label %loop
+
+loop:                                           ; preds = %entry, %loop
+  %k.015 = phi i64 [ %v15, %loop ], [ %v12, %entry ]
+  %v14 = getelementptr inbounds i8, i8* %array, i64 %k.015
+  store i8 0, i8* %v14
+  %v15 = add nuw nsw i64 %k.015, %step
+  %v16 = icmp slt i64 %v15, 8193
+  br i1 %v16, label %loop, label %loopexit
+
+loopexit:                             ; preds = %loop
+  ret i32 0
+}
+
+!0 = !{i64 1, i64 100}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index 9e78edf..fea15b6 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
 ; Tests for unrolling loops with run-time trip counts
 
 ; CHECK: %xtraiter = and i32 %n
diff --git a/test/Transforms/LoopUnroll/runtime-loop4.ll b/test/Transforms/LoopUnroll/runtime-loop4.ll
index 9be0ffd..5014c73 100644
--- a/test/Transforms/LoopUnroll/runtime-loop4.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop4.ll
@@ -20,7 +20,8 @@ loop1:
   br label %loop2.header
 
 loop2.header:
-  br label %loop2
+  %e = icmp uge i32 %iter, 1
+  br i1 %e, label %loop2, label %exit2
 
 loop2:
   %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
diff --git a/test/Transforms/LoopUnroll/runtime-loop5.ll b/test/Transforms/LoopUnroll/runtime-loop5.ll
new file mode 100644
index 0000000..e8d5177
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
+
+; Given that the trip-count of this loop is a 3-bit value, we cannot
+; safely unroll it with a count of anything more than 8.
+
+define i3 @test(i3* %a, i3 %n) {
+; UNROLL-16-LABEL: @test(
+; UNROLL-4-LABEL: @test(
+entry:
+  %cmp1 = icmp eq i3 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+; UNROLL-16-NOT: for.body.prol:
+; UNROLL-4: for.body.prol:
+
+for.body:                                         ; preds = %for.body, %entry
+; UNROLL-16-LABEL: for.body:
+; UNROLL-4-LABEL: for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i3 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i3, i3* %a, i64 %indvars.iv
+
+; UNROLL-16-LABEL: for.body
+; UNROLL-16-LABEL: getelementptr
+; UNROLL-16-LABEL-NOT: getelementptr
+
+; UNROLL-4-LABEL: getelementptr
+; UNROLL-4-LABEL: getelementptr
+; UNROLL-4-LABEL: getelementptr
+; UNROLL-4-LABEL: getelementptr
+
+  %0 = load i3, i3* %arrayidx
+  %add = add nsw i3 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i3
+  %exitcond = icmp eq i3 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+; UNROLL-16-LABEL: for.end
+; UNROLL-4-LABEL: for.end
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
+  ret i3 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll b/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
index 906c2c5..8ea4ed3 100644
--- a/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
+++ b/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
@@ -19,7 +19,7 @@ bb54:		; preds = %bb31, %bb5, %bb
 	br i1 false, label %bb64, label %bb
 bb64:		; preds = %bb54
 	%tmp6566 = sext i16 %p_6 to i32		; <i32> [#uses=1]
-	%tmp68 = tail call i32 (...)* @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind 		; <i32> [#uses=0]
+	%tmp68 = tail call i32 (...) @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind 		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
index 31dba79..90c0944 100644
--- a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
+++ b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
@@ -15,7 +15,7 @@ bb:		; preds = %bb.nph, %bb3
 	br i1 %1, label %bb2, label %bb1
 
 bb1:		; preds = %bb
-	%2 = tail call i32 (...)* @b() nounwind		; <i32> [#uses=0]
+	%2 = tail call i32 (...) @b() nounwind		; <i32> [#uses=0]
 	br label %bb2
 
 bb2:		; preds = %bb, %bb1
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index f942ecc..c794ca0 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -45,7 +45,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!26}
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
-!1 = !{i32 0}
+!1 = !{}
 !2 = !{!3}
 !3 = !MDSubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, function: i32 ()* @test, variables: !8)
 !4 = !MDFile(filename: "test", directory: "/path/to/somewhere")
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index 97d9da0..b9f98cf 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -65,7 +65,7 @@ attributes #1 = { nounwind readnone }
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "-", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4}
 !4 = !MDSubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: i32 (i32*, i32)* @f, variables: !12)
 !5 = !MDFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index 5d72371..73d9f44 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -18,7 +18,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -50,7 +50,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -81,7 +81,7 @@ entry:
   br i1 %tobool1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -113,7 +113,7 @@ entry:
   br i1 %tobool2, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -143,7 +143,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -231,7 +231,7 @@ entry:
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
@@ -260,7 +260,7 @@ entry:
   br i1 %expval, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %call = call i32 (...)* @f()
+  %call = call i32 (...) @f()
   store i32 %call, i32* %retval
   br label %return
 
diff --git a/test/Transforms/LowerInvoke/2003-12-10-Crash.ll b/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
index 31f3d42..fca8e86 100644
--- a/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
+++ b/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
@@ -15,8 +15,11 @@ invoke_cont.0:		; preds = %then
 			to label %try_exit unwind label %try_catch
 try_catch:		; preds = %invoke_cont.0, %then
 	%__tmp.0 = phi i32* [ null, %invoke_cont.0 ], [ null, %then ]		; <i32*> [#uses=0]
+  %res = landingpad { i8* } personality i32 (...)* @__gxx_personality_v0
+          cleanup
 	ret void
 try_exit:		; preds = %invoke_cont.0
 	ret void
 }
 
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 2a009ab..7a55b07 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -48,5 +48,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !10 = !MDLocation(line: 3, scope: !11)
 !11 = distinct !MDLexicalBlock(line: 2, column: 0, file: !12, scope: !1)
 !12 = !MDFile(filename: "testfunc.c", directory: "/tmp")
-!13 = !{i32 0}
+!13 = !{}
 !14 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 0d9e3de..e60acc4 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -53,5 +53,5 @@ return:                                           ; preds = %entry
 !18 = !MDLocation(line: 5, scope: !10, inlinedAt: !8)
 !19 = !MDLocation(line: 10, scope: !1)
 !20 = !MDFile(filename: "bar.c", directory: "/tmp/")
-!21 = !{i32 0}
+!21 = !{}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index f54406f..7d7f3a6 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -46,7 +46,7 @@ entry:
 	store i8 %c, i8* %tmp69, align 1
 	%tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18		; <i8*> [#uses=1]
 	store i8 %c, i8* %tmp73, align 1
-	%tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind
+	%tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind
 	ret void
 ; CHECK-LABEL: @test1(
 ; CHECK-NOT: store
diff --git a/test/Transforms/NaryReassociate/nary-add.ll b/test/Transforms/NaryReassociate/nary-add.ll
new file mode 100644
index 0000000..39d7c59
--- /dev/null
+++ b/test/Transforms/NaryReassociate/nary-add.ll
@@ -0,0 +1,198 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+declare void @foo(i32)
+
+; foo(a + c);
+; foo((a + (b + c));
+;   =>
+; t = a + c;
+; foo(t);
+; foo(t + b);
+define void @left_reassociate(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @left_reassociate(
+  %1 = add i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = add i32 %b, %c
+  %3 = add i32 %a, %2
+; CHECK: add i32 [[BASE]], %b
+  call void @foo(i32 %3)
+  ret void
+}
+
+; foo(a + c);
+; foo((a + b) + c);
+;   =>
+; t = a + c;
+; foo(t);
+; foo(t + b);
+define void @right_reassociate(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @right_reassociate(
+  %1 = add i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = add i32 %a, %b
+  %3 = add i32 %2, %c
+; CHECK: add i32 [[BASE]], %b
+  call void @foo(i32 %3)
+  ret void
+}
+
+; t1 = a + c;
+; foo(t1);
+; t2 = a + b;
+; foo(t2);
+; t3 = t2 + c;
+; foo(t3);
+;
+; Do not rewrite t3 into t1 + b because t2 is used elsewhere and is likely free.
+define void @no_reassociate(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @no_reassociate(
+  %1 = add i32 %a, %c
+; CHECK: add i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = add i32 %a, %b
+; CHECK: add i32 %a, %b
+  call void @foo(i32 %2)
+  %3 = add i32 %2, %c
+; CHECK: add i32 %2, %c
+  call void @foo(i32 %3)
+  ret void
+}
+
+; if (p1)
+;   foo(a + c);
+; if (p2)
+;   foo(a + c);
+; if (p3)
+;   foo((a + b) + c);
+;
+; No action because (a + c) does not dominate ((a + b) + c).
+define void @conditional(i1 %p1, i1 %p2, i1 %p3, i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @conditional(
+entry:
+  br i1 %p1, label %then1, label %branch1
+
+then1:
+  %0 = add i32 %a, %c
+; CHECK: add i32 %a, %c
+  call void @foo(i32 %0)
+  br label %branch1
+
+branch1:
+  br i1 %p2, label %then2, label %branch2
+
+then2:
+  %1 = add i32 %a, %c
+; CHECK: add i32 %a, %c
+  call void @foo(i32 %1)
+  br label %branch2
+
+branch2:
+  br i1 %p3, label %then3, label %return
+
+then3:
+  %2 = add i32 %a, %b
+; CHECK: %2 = add i32 %a, %b
+  %3 = add i32 %2, %c
+; CHECK: add i32 %2, %c
+  call void @foo(i32 %3)
+  br label %return
+
+return:
+  ret void
+}
+
+; This test involves more conditional reassociation candidates. It exercises
+; the stack optimization in tryReassociatedAdd that pops the candidates that
+; do not dominate the current instruction.
+;
+;       def1
+;      cond1
+;      /  \
+;     /    \
+;   cond2  use2
+;   /  \
+;  /    \
+; def2  def3
+;      cond3
+;       /  \
+;      /    \
+;    def4   use1
+;
+; NaryReassociate should match use1 with def3, and use2 with def1.
+define void @conditional2(i32 %a, i32 %b, i32 %c, i1 %cond1, i1 %cond2, i1 %cond3) {
+entry:
+  %def1 = add i32 %a, %b
+  br i1 %cond1, label %bb1, label %bb6
+bb1:
+  br i1 %cond2, label %bb2, label %bb3
+bb2:
+  %def2 = add i32 %a, %b
+  call void @foo(i32 %def2)
+  ret void
+bb3:
+  %def3 = add i32 %a, %b
+  br i1 %cond3, label %bb4, label %bb5
+bb4:
+  %def4 = add i32 %a, %b
+  call void @foo(i32 %def4)
+  ret void
+bb5:
+  %0 = add i32 %a, %c
+  %1 = add i32 %0, %b
+; CHECK: [[t1:%[0-9]+]] = add i32 %def3, %c
+  call void @foo(i32 %1) ; foo((a + c) + b);
+; CHECK-NEXT: call void @foo(i32 [[t1]])
+  ret void
+bb6:
+  %2 = add i32 %a, %c
+  %3 = add i32 %2, %b
+; CHECK: [[t2:%[0-9]+]] = add i32 %def1, %c
+  call void @foo(i32 %3) ; foo((a + c) + b);
+; CHECK-NEXT: call void @foo(i32 [[t2]])
+  ret void
+}
+
+; foo((a + b) + c)
+; foo(((a + d) + b) + c)
+;   =>
+; t = (a + b) + c;
+; foo(t);
+; foo(t + d);
+define void @quaternary(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @quaternary(
+  %1 = add i32 %a, %b
+  %2 = add i32 %1, %c
+  call void @foo(i32 %2)
+; CHECK: call void @foo(i32 [[TMP1:%[a-zA-Z0-9]]])
+  %3 = add i32 %a, %d
+  %4 = add i32 %3, %b
+  %5 = add i32 %4, %c
+; CHECK: [[TMP2:%[a-zA-Z0-9]]] = add i32 [[TMP1]], %d
+  call void @foo(i32 %5)
+; CHECK: call void @foo(i32 [[TMP2]]
+  ret void
+}
+
+define void @iterative(i32 %a, i32 %b, i32 %c) {
+  %ab = add i32 %a, %b
+  %abc = add i32 %ab, %c
+  call void @foo(i32 %abc)
+
+  %ab2 = add i32 %ab, %b
+  %ab2c = add i32 %ab2, %c
+; CHECK: %ab2c = add i32 %abc, %b
+  call void @foo(i32 %ab2c)
+; CHECK-NEXT: call void @foo(i32 %ab2c)
+
+  %ab3 = add i32 %ab2, %b
+  %ab3c = add i32 %ab3, %c
+; CHECK-NEXT: %ab3c = add i32 %ab2c, %b
+  call void @foo(i32 %ab3c)
+; CHECK-NEXT: call void @foo(i32 %ab3c)
+
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 2896fbb..c4f73e7 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -2684,8 +2684,8 @@ define {<2 x float>, <2 x float>} @"\01-[A z]"({}* %self, i8* nocapture %_cmd) n
 invoke.cont:
   %0 = bitcast {}* %self to i8*
   %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !MDExpression())
-  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !MDExpression())
+  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !MDExpression()), !dbg !MDLocation(scope: !2)
+  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !MDLocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !MDExpression()), !dbg !MDLocation(scope: !2)
   %ivar = load i64, i64* @"OBJC_IVAR_$_A.myZ", align 8
   %add.ptr = getelementptr i8, i8* %0, i64 %ivar
   %tmp1 = bitcast i8* %add.ptr to float*
@@ -2706,7 +2706,7 @@ invoke.cont:
   %3 = bitcast i8* %arrayidx19 to float*
   %tmp20 = load float, float* %3, align 4
   %conv21 = fpext float %tmp20 to double
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str4, i64 0, i64 0), double %conv, double %conv8, double %conv14, double %conv21)
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str4, i64 0, i64 0), double %conv, double %conv8, double %conv14, double %conv21)
   %ivar23 = load i64, i64* @"OBJC_IVAR_$_A.myZ", align 8
   %add.ptr24 = getelementptr i8, i8* %0, i64 %ivar23
   %4 = bitcast i8* %add.ptr24 to i128*
@@ -2758,7 +2758,7 @@ for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %1 = tail call i8* @objc_retain(i8* %x) nounwind
   %tmp5 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
-  %call = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %1, i8* %tmp5)
+  %call = tail call i8* (i8*, i8*, ...) @objc_msgSend(i8* %1, i8* %tmp5)
   tail call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
   %inc = add nsw i64 %i.010, 1
   %exitcond = icmp eq i64 %inc, %n
@@ -2837,17 +2837,17 @@ entry:
   %tmp2 = load %struct.__CFString*, %struct.__CFString** @kUTTypePlainText, align 8
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_19", align 8
   %tmp4 = bitcast %struct._class_t* %tmp1 to i8*
-  %call5 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp4, i8* %tmp3, %struct.__CFString* %tmp2)
+  %call5 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp4, i8* %tmp3, %struct.__CFString* %tmp2)
   %tmp5 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_21", align 8
   %tmp6 = bitcast %3* %pboard to i8*
-  %call76 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp6, i8* %tmp5, i8* %call5)
+  %call76 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp6, i8* %tmp5, i8* %call5)
   %tmp9 = call i8* @objc_retain(i8* %call76) nounwind
   %tobool = icmp eq i8* %tmp9, null
   br i1 %tobool, label %end, label %land.lhs.true
 
 land.lhs.true:                                    ; preds = %entry
   %tmp11 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_23", align 8
-  %call137 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp6, i8* %tmp11, i8* %tmp9)
+  %call137 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp6, i8* %tmp11, i8* %tmp9)
   %tmp = bitcast i8* %call137 to %1*
   %tmp10 = call i8* @objc_retain(i8* %call137) nounwind
   call void @objc_release(i8* null) nounwind
@@ -2866,7 +2866,7 @@ land.lhs.true23:                                  ; preds = %if.then
   %tmp24 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
   %tmp26 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
   %tmp27 = bitcast %struct._class_t* %tmp24 to i8*
-  %call2822 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp27, i8* %tmp26, i8* %call137)
+  %call2822 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp27, i8* %tmp26, i8* %call137)
   %tmp13 = bitcast i8* %call2822 to %5*
   %tmp14 = call i8* @objc_retain(i8* %call2822) nounwind
   call void @objc_release(i8* null) nounwind
@@ -2877,9 +2877,9 @@ if.end:                                           ; preds = %land.lhs.true23
   %tmp32 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
   %tmp33 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
   %tmp34 = bitcast %struct._class_t* %tmp32 to i8*
-  %call35 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp34, i8* %tmp33)
+  %call35 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp34, i8* %tmp33)
   %tmp37 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
-  %call3923 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call35, i8* %tmp37, i8* %call2822, i32 signext 1, %4** %err)
+  %call3923 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %call35, i8* %tmp37, i8* %call2822, i32 signext 1, %4** %err)
   %cmp = icmp eq i8* %call3923, null
   br i1 %cmp, label %if.then44, label %end
 
@@ -2890,13 +2890,13 @@ if.then44:                                        ; preds = %if.end, %land.lhs.t
   %call513 = extractvalue %struct._NSRange %call51, 0
   %call514 = extractvalue %struct._NSRange %call51, 1
   %tmp52 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_37", align 8
-  %call548 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call137, i8* %tmp52, i64 %call513, i64 %call514)
+  %call548 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %call137, i8* %tmp52, i64 %call513, i64 %call514)
   %tmp55 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_38", align 8
   %tmp56 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_40", align 8
   %tmp57 = bitcast %struct._class_t* %tmp55 to i8*
-  %call58 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp57, i8* %tmp56)
+  %call58 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp57, i8* %tmp56)
   %tmp59 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_42", align 8
-  %call6110 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call548, i8* %tmp59, i8* %call58)
+  %call6110 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %call548, i8* %tmp59, i8* %call58)
   %tmp15 = call i8* @objc_retain(i8* %call6110) nounwind
   call void @objc_release(i8* %call137) nounwind
   %tmp64 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_46", align 8
@@ -2906,7 +2906,7 @@ if.then44:                                        ; preds = %if.end, %land.lhs.t
 
 if.then68:                                        ; preds = %if.then44
   %tmp70 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_48", align 8
-  %call7220 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call6110, i8* %tmp70)
+  %call7220 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %call6110, i8* %tmp70)
   %tmp16 = call i8* @objc_retain(i8* %call7220) nounwind
   call void @objc_release(i8* %call6110) nounwind
   br label %if.end74
@@ -2916,7 +2916,7 @@ if.end74:                                         ; preds = %if.then68, %if.then
   %filename.0 = bitcast i8* %filename.0.in to %1*
   %tmp17 = load i8*, i8** bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to i8**), align 16
   %tmp18 = bitcast i8* %tmp17 to i8 (i8*, %struct._message_ref_t*, i8*, ...)*
-  %call78 = call signext i8 (i8*, %struct._message_ref_t*, i8*, ...)* %tmp18(i8* %call137, %struct._message_ref_t* bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to %struct._message_ref_t*), i8* %filename.0.in)
+  %call78 = call signext i8 (i8*, %struct._message_ref_t*, i8*, ...) %tmp18(i8* %call137, %struct._message_ref_t* bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to %struct._message_ref_t*), i8* %filename.0.in)
   %tobool79 = icmp eq i8 %call78, 0
   br i1 %tobool79, label %land.lhs.true80, label %if.then109
 
@@ -2930,7 +2930,7 @@ if.end106:                                        ; preds = %land.lhs.true80
   %tmp88 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
   %tmp90 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
   %tmp91 = bitcast %struct._class_t* %tmp88 to i8*
-  %call9218 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp91, i8* %tmp90, i8* %filename.0.in)
+  %call9218 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp91, i8* %tmp90, i8* %filename.0.in)
   %tmp20 = bitcast i8* %call9218 to %5*
   %tmp21 = call i8* @objc_retain(i8* %call9218) nounwind
   %tmp22 = bitcast %5* %url.025 to i8*
@@ -2938,9 +2938,9 @@ if.end106:                                        ; preds = %land.lhs.true80
   %tmp94 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
   %tmp95 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
   %tmp96 = bitcast %struct._class_t* %tmp94 to i8*
-  %call97 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp96, i8* %tmp95)
+  %call97 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp96, i8* %tmp95)
   %tmp99 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
-  %call10119 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call97, i8* %tmp99, i8* %call9218, i32 signext 1, %4** %err)
+  %call10119 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %call97, i8* %tmp99, i8* %call9218, i32 signext 1, %4** %err)
   %phitmp = icmp eq i8* %call10119, null
   br i1 %phitmp, label %if.then109, label %end
 
@@ -2958,10 +2958,10 @@ if.then112:                                       ; preds = %if.then109
   %tmp118 = load %1*, %1** @NSFilePathErrorKey, align 8
   %tmp119 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_53", align 8
   %tmp120 = bitcast %struct._class_t* %tmp115 to i8*
-  %call12113 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp120, i8* %tmp119, %1* %call117, %1* %tmp118, i8* null)
+  %call12113 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp120, i8* %tmp119, %1* %call117, %1* %tmp118, i8* null)
   %tmp122 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_55", align 8
   %tmp123 = bitcast %struct._class_t* %tmp113 to i8*
-  %call12414 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp123, i8* %tmp122, %1* %tmp114, i64 258, i8* %call12113)
+  %call12414 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp123, i8* %tmp122, %1* %tmp114, i64 258, i8* %call12113)
   %tmp23 = call i8* @objc_retain(i8* %call12414) nounwind
   %tmp25 = call i8* @objc_autorelease(i8* %tmp23) nounwind
   %tmp28 = bitcast i8* %tmp25 to %4*
@@ -2973,9 +2973,9 @@ if.end125:                                        ; preds = %if.then112, %if.the
   %tmp126 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_56", align 8
   %tmp128 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_58", align 8
   %tmp129 = bitcast %struct._class_t* %tmp126 to i8*
-  %call13015 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp129, i8* %tmp128, %4* %tmp127)
+  %call13015 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %tmp129, i8* %tmp128, %4* %tmp127)
   %tmp131 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_60", align 8
-  %call13317 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call13015, i8* %tmp131)
+  %call13317 = call i8* (i8*, i8*, ...) @objc_msgSend(i8* %call13015, i8* %tmp131)
   br label %end
 
 end:                                              ; preds = %if.end125, %if.end106, %if.end, %land.lhs.true, %entry
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index 2259e17..6ad46f2 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -169,7 +169,7 @@ return:                                           ; preds = %if.then, %entry
 ; CHECK-NOT: clang.arc.use
 ; CHECK: }
 define void @test9(i8* %a, i8* %b) {
-  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  call void (...) @clang.arc.use(i8* %a, i8* %b) nounwind
   ret void
 }
 
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index 215841c..25135a3 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -62,11 +62,11 @@ lpad:                                             ; preds = %entry
   call void @objc_end_catch(), !dbg !49, !clang.arc.no_objc_arc_exceptions !38
 ; CHECK: call void @objc_release(i8* %call)
   call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
-  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !50, !clang.arc.no_objc_arc_exceptions !38
+  call void (i8*, ...) @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !50, !clang.arc.no_objc_arc_exceptions !38
   br label %if.end, !dbg !52
 
 if.end:                                           ; preds = %lpad, %eh.cont
-  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !53, !clang.arc.no_objc_arc_exceptions !38
+  call void (i8*, ...) @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !53, !clang.arc.no_objc_arc_exceptions !38
 ; CHECK: call void @objc_release(i8* %call)
   call void @objc_release(i8* %call) nounwind, !dbg !54, !clang.imprecise_release !38
   ret i32 0, !dbg !54
@@ -91,7 +91,7 @@ entry:
   %tmp1 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
   %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
-  call void (i8*, i8*, %0*, %0*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*, %0*, ...)*)(i8* %tmp3, i8* %tmp2, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*), %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*)), !dbg !56, !clang.arc.no_objc_arc_exceptions !38
+  call void (i8*, i8*, %0*, %0*, ...) bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*, %0*, ...)*)(i8* %tmp3, i8* %tmp2, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*), %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*)), !dbg !56, !clang.arc.no_objc_arc_exceptions !38
   call void @objc_release(i8* %obj) nounwind, !dbg !58, !clang.imprecise_release !38
   ret void, !dbg !58
 }
@@ -114,14 +114,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!33, !34, !35, !36, !61}
 
 !0 = !MDCompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
-!1 = !{i32 0}
+!1 = !{}
 !3 = !{!5, !27}
-!5 = !MDSubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, function: i32 ()* @main, variables: !10)
+!5 = !MDSubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, function: i32 ()* @main, variables: !11)
 !6 = !MDFile(filename: "test.m", directory: "/Volumes/Files/gottesmmcab/Radar/12906997")
 !7 = !MDSubroutineType(types: !8)
 !8 = !{!9}
 !9 = !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !{!11}
 !11 = !{!12, !21, !25}
 !12 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "obj", line: 11, scope: !13, file: !6, type: !14)
 !13 = distinct !MDLexicalBlock(line: 10, column: 0, file: !60, scope: !5)
@@ -138,10 +137,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !24 = !MDBasicType(tag: DW_TAG_base_type, name: "signed char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !25 = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "obj2", line: 15, scope: !26, file: !6, type: !14)
 !26 = distinct !MDLexicalBlock(line: 14, column: 0, file: !60, scope: !22)
-!27 = !MDSubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, function: void (i8*)* @ThrowFunc, variables: !30)
+!27 = !MDSubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, function: void (i8*)* @ThrowFunc, variables: !31)
 !28 = !MDSubroutineType(types: !29)
 !29 = !{null, !14}
-!30 = !{!31}
 !31 = !{!32}
 !32 = !MDLocalVariable(tag: DW_TAG_arg_variable, name: "obj", line: 4, arg: 1, scope: !27, file: !6, type: !14)
 !33 = !{i32 1, !"Objective-C Version", i32 2}
diff --git a/test/Transforms/ObjCARC/intrinsic-use-isolated.ll b/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
index f5c31fd..03d7520 100644
--- a/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
+++ b/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
@@ -10,7 +10,7 @@ declare void @clang.arc.use(...) nounwind
 ; CHECK-NOT: clang.arc.use
 ; CHECK: }
 define void @test0(i8* %a, i8* %b) {
-  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  call void (...) @clang.arc.use(i8* %a, i8* %b) nounwind
   ret void
 }
 
diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll
index d85cb3e..f75b187 100644
--- a/test/Transforms/ObjCARC/intrinsic-use.ll
+++ b/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -23,13 +23,13 @@ declare void @test0_helper(i8*, i8**)
 ; CHECK-NEXT:   @objc_retain(i8* %y)
 ; CHECK-NEXT:   call void @test0_helper
 ; CHECK-NEXT:   [[VAL1:%.*]] = load i8*, i8** %temp0
-; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
+; CHECK-NEXT:   call void (...) @clang.arc.use(i8* %y)
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_release(i8* %y)
 ; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
 ; CHECK-NEXT:   call void @test0_helper
 ; CHECK-NEXT:   [[VAL2:%.*]] = load i8*, i8** %temp1
-; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
+; CHECK-NEXT:   call void (...) @clang.arc.use(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
 ; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_autorelease(i8* %x)
@@ -49,13 +49,13 @@ entry:
   call void @test0_helper(i8* %x, i8** %temp0)
   %val1 = load i8*, i8** %temp0
   %2 = call i8* @objc_retain(i8* %val1) nounwind
-  call void (...)* @clang.arc.use(i8* %y) nounwind
+  call void (...) @clang.arc.use(i8* %y) nounwind
   call void @objc_release(i8* %y) nounwind
   store i8* %val1, i8** %temp1
   call void @test0_helper(i8* %x, i8** %temp1)
   %val2 = load i8*, i8** %temp1
   %3 = call i8* @objc_retain(i8* %val2) nounwind
-  call void (...)* @clang.arc.use(i8* %val1) nounwind
+  call void (...) @clang.arc.use(i8* %val1) nounwind
   call void @objc_release(i8* %val1) nounwind
   %4 = call i8* @objc_retain(i8* %x) nounwind
   %5 = call i8* @objc_autorelease(i8* %x) nounwind
@@ -71,13 +71,13 @@ entry:
 ; CHECK-NEXT:   @objc_retain(i8* %y)
 ; CHECK-NEXT:   call void @test0_helper
 ; CHECK-NEXT:   [[VAL1:%.*]] = load i8*, i8** %temp0
-; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
+; CHECK-NEXT:   call void (...) @clang.arc.use(i8* %y)
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_release(i8* %y)
 ; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
 ; CHECK-NEXT:   call void @test0_helper
 ; CHECK-NEXT:   [[VAL2:%.*]] = load i8*, i8** %temp1
-; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
+; CHECK-NEXT:   call void (...) @clang.arc.use(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
 ; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_autorelease(i8* %x)
@@ -95,13 +95,13 @@ entry:
   call void @test0_helper(i8* %x, i8** %temp0)
   %val1 = load i8*, i8** %temp0
   %2 = call i8* @objc_retain(i8* %val1) nounwind
-  call void (...)* @clang.arc.use(i8* %y) nounwind
+  call void (...) @clang.arc.use(i8* %y) nounwind
   call void @objc_release(i8* %y) nounwind, !clang.imprecise_release !0
   store i8* %val1, i8** %temp1
   call void @test0_helper(i8* %x, i8** %temp1)
   %val2 = load i8*, i8** %temp1
   %3 = call i8* @objc_retain(i8* %val2) nounwind
-  call void (...)* @clang.arc.use(i8* %val1) nounwind
+  call void (...) @clang.arc.use(i8* %val1) nounwind
   call void @objc_release(i8* %val1) nounwind, !clang.imprecise_release !0
   %4 = call i8* @objc_retain(i8* %x) nounwind
   %5 = call i8* @objc_autorelease(i8* %x) nounwind
diff --git a/test/Transforms/ObjCARC/move-and-merge-autorelease.ll b/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
index 5d19f35..0a68541 100644
--- a/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
+++ b/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
@@ -99,9 +99,9 @@ bb81:                                             ; preds = %bb, %bb76
   %tmp10.1 = phi %0* [ %tmp10.0, %bb76 ], [ null, %bb ]
   %tmp83 = bitcast %0* %tmp10.1 to i8*
   %tmp84 = call i8* @objc_retain(i8* %tmp83) nounwind
-  %tmp88 = bitcast i8* %tmp87 to %0*
   call void @objc_release(i8* %tmp23) nounwind
   %tmp87 = call i8* @objc_autorelease(i8* %tmp84) nounwind
+  %tmp88 = bitcast i8* %tmp87 to %0*
   %tmp92 = bitcast %0* %tmp10.1 to i8*
   call void @objc_release(i8* %tmp92) nounwind
   ret %0* %tmp88
diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll
index f214955..2774d20 100644
--- a/test/Transforms/PhaseOrdering/PR6627.ll
+++ b/test/Transforms/PhaseOrdering/PR6627.ll
@@ -36,7 +36,7 @@ land.lhs.true17:                                  ; preds = %land.lhs.true9
   br i1 %cmp23, label %if.then, label %if.end
 
 if.then:                                          ; preds = %land.lhs.true17
-  %call25 = call i32 (...)* @doo()
+  %call25 = call i32 (...) @doo()
   br label %if.end
 
 if.end:
@@ -80,7 +80,7 @@ land.lhs.true17:                                  ; preds = %land.lhs.true9
   br i1 %cmp23, label %if.then, label %if.end
 
 if.then:                                          ; preds = %land.lhs.true17
-  %call25 = call i32 (...)* @doo()
+  %call25 = call i32 (...) @doo()
   br label %if.end
 
 if.end:
diff --git a/test/Transforms/PlaceSafepoints/basic.ll b/test/Transforms/PlaceSafepoints/basic.ll
index ca63da4..4dbff58 100644
--- a/test/Transforms/PlaceSafepoints/basic.ll
+++ b/test/Transforms/PlaceSafepoints/basic.ll
@@ -76,7 +76,7 @@ define i1 @test_call_with_result() gc "statepoint-example" {
 ; CHECK: (i1 (i1)* @i1_return_i1, i32 1, i32 0, i1 false, i32 0)
 ; CHECK: %call12 = call i1 @llvm.experimental.gc.result.i1
 entry:
-  %call1 = tail call i1 (i1)* @i1_return_i1(i1 false)
+  %call1 = tail call i1 (i1) @i1_return_i1(i1 false)
   ret i1 %call1
 }
 
diff --git a/test/Transforms/Reassociate/looptest.ll b/test/Transforms/Reassociate/looptest.ll
index 8b6a409..5563070 100644
--- a/test/Transforms/Reassociate/looptest.ll
+++ b/test/Transforms/Reassociate/looptest.ll
@@ -34,7 +34,7 @@ bb4:		; preds = %bb4, %bb3
 	%reg113 = add i32 %reg115, %reg117		; <i32> [#uses=1]
 	%reg114 = add i32 %reg113, %reg116		; <i32> [#uses=1]
 	%cast227 = getelementptr [4 x i8], [4 x i8]* @.LC0, i64 0, i64 0		; <i8*> [#uses=1]
-	call i32 (i8*, ...)* @printf( i8* %cast227, i32 %reg114 )		; <i32>:0 [#uses=0]
+	call i32 (i8*, ...) @printf( i8* %cast227, i32 %reg114 )		; <i32>:0 [#uses=0]
 	%reg118 = add i32 %reg117, 1		; <i32> [#uses=2]
 	%cond224 = icmp ne i32 %reg118, %Num		; <i1> [#uses=1]
 	br i1 %cond224, label %bb4, label %bb5
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
index d026d50..e6e5efc 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
@@ -20,7 +20,7 @@ merge:
 ; CHECK-LABEL: merge:
 ; CHECK:   %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
   %merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
index 6f02056..fa800ea 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
@@ -30,7 +30,7 @@ false:
 
 merge:
   %next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
index 1e8d071..c7fd32e 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
@@ -19,7 +19,7 @@ loop:                                             ; preds = %loop, %entry
 ; CHECK-DAG:  [ %next.relocated, %loop ]
   %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
   %next = getelementptr i64, i64 addrspace(1)* %current, i32 1
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
index 94787bf..6509d23 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
@@ -13,7 +13,7 @@ there:
 
 merge:
   %merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
index 5aaa47e..7a80f31 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
@@ -12,7 +12,7 @@ loop:
   %current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
   %next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
   %next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
index 16bb3ac..33619ad 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
@@ -13,9 +13,9 @@ entry:
 
 loop:
 ; CHECK: loop:
-; CHECK:  %safepoint_token1 = call i32 (i64 addrspace(1)* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK:  %safepoint_token1 = call i32 (i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
 ; CHECK-NEXT:  %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result
-  %safepoint_token1 = call i32 (i64 addrspace(1)* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token1 = call i32 (i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32 %safepoint_token1)
   switch i32 %condition, label %dest_a [
     i32 0, label %dest_b
@@ -37,11 +37,11 @@ merge:
 ; CHECK:  %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
 
   %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
-  %safepoint_token3 = call i32 (void (i64 addrspace(1)*)*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token3 = call i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %merge.split
 
 merge.split:                                      ; preds = %merge
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
index 8390ecd..0800504 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
@@ -23,7 +23,7 @@ merge:
 ; CHECK:  %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
 ; CHECK-NEXT:  %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]  
   %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
index 1d60d03..f286ed9 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
@@ -33,7 +33,7 @@ merge:
 ; CHECK:  %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
 ; CHECK-NEXT:  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]  
   %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
index e4807e6..f51e117 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
@@ -44,7 +44,7 @@ merge:
 ; CHECK:  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]  
   %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
 
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
index 488b61e..07bc42b 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
@@ -24,7 +24,7 @@ check_for_null:
 loop_back:
   %next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
   %next_index = add i32 %index, 1
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop_check
 
 not_found:
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
index c9fbcd6..973ac7e 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
@@ -13,7 +13,7 @@ loop:
   %condition = call i1 @runtime_value()
   %maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
   %next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
index c5035eb..eab963f 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
@@ -16,7 +16,7 @@ loop:
 ; CHECK-DAG: [ %obj.relocated, %loop ]
 ; CHECK-DAG: [ %obj, %entry ]
   call void @use_obj(i64 addrspace(1)* %obj)
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
@@ -57,7 +57,7 @@ define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1
 ; CHECK: merge:
 ; CHECK-NEXT: %base_phi = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
   %value = phi i64 addrspace(1)* [ %a.cast, %left], [ %a.cast, %left], [ %a.cast, %left], [ %b.cast, %right]
-  %safepoint_token = call i32 (void (i64 addrspace(1)*)*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
 
   ret i64 addrspace(1)* %value
 }
@@ -76,7 +76,9 @@ loop:                                             ; preds = %loop, %entry
 ; CHECK-LABEL: loop
 ; CHECK:   %base_phi = phi i64 addrspace(1)*
 ; CHECK-DAG: [ %base_obj, %entry ]
-; CHECK-DAG: [ %base_select.relocated, %loop ]
+; Given the two selects are equivelent, so are their base phis - ideally,
+; we'd have commoned these, but that's a missed optimization, not correctness.
+; CHECK-DAG: [ [[DISCARD:%base_select.*.relocated]], %loop ]
 ; CHECK-NOT: base_phi2
 ; CHECK: next = select
 ; CHECK: base_select
@@ -89,7 +91,7 @@ loop:                                             ; preds = %loop, %entry
   %nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
   %next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
   %extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/basics.ll b/test/Transforms/RewriteStatepointsForGC/basics.ll
index c1a1e4e..1720cc1 100644
--- a/test/Transforms/RewriteStatepointsForGC/basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/basics.ll
@@ -10,7 +10,7 @@ define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
 entry:
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %obj
 }
 
@@ -23,8 +23,8 @@ define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
 entry:
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %obj
 }
 
@@ -39,7 +39,7 @@ define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
 entry:
   %derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
 
   %a = load i8, i8 addrspace(1)* %derived
   %b = load i8, i8 addrspace(1)* %obj
@@ -57,14 +57,14 @@ taken:
 ; CHECK-LABEL: taken:
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %merge
 
 untaken:
 ; CHECK-LABEL: untaken:
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %merge
 
 merge:
@@ -75,13 +75,13 @@ merge:
 }
 
 ; When run over a function which doesn't opt in, should do nothing!
-define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) {
+define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
 ; CHECK-LABEL: @test5
 ; CHECK-LABEL: entry:
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
 entry:
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %obj
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
new file mode 100644
index 0000000..c1a693e
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
@@ -0,0 +1,87 @@
+; Test that we can correctly handle vectors of pointers in statepoint 
+; rewriting.  Currently, we scalarize, but that's an implementation detail.
+; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck  %s
+
+; A non-vector relocation for comparison
+define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: test
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated
+entry:
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %obj
+}
+
+; A base vector from a argument
+define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
+; CHECK-LABEL: test2
+; CHECK: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %5
+entry:
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+; A base vector from a load
+define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %5
+entry:
+  %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare i32 @fake_personality_function()
+
+; When a statepoint is an invoke rather than a call
+define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test4
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+entry:
+  %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  invoke i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
+          to label %normal_return unwind label %exceptional_return
+
+; CHECK-LABEL: normal_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %6
+normal_return:                                    ; preds = %entry
+  ret <2 x i64 addrspace(1)*> %obj
+
+; CHECK-LABEL: exceptional_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %10
+exceptional_return:                               ; preds = %entry
+  %landing_pad4 = landingpad { i8*, i32 } personality i32 ()* @fake_personality_function
+          cleanup
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare void @do_safepoint()
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
new file mode 100644
index 0000000..0990c68
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
@@ -0,0 +1,158 @@
+; A collection of liveness test cases to ensure we're reporting the
+; correct live values at statepoints
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+
+
+; Tests to make sure we consider %obj live in both the taken and untaken 
+; predeccessor of merge.
+define i64 addrspace(1)* @test1(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+entry:
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i64 addrspace(1)*
+; CHECK-NEXT: br label %merge
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  br label %merge
+
+untaken:
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated1 = call coldcc i64 addrspace(1)*
+; CHECK-NEXT: br label %merge
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  br label %merge
+
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)* [ %obj.relocated, %taken ], [ %obj.relocated1, %untaken ]
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+  ret i64 addrspace(1)* %obj
+}
+
+; A local kill should not effect liveness in predecessor block
+define i64 addrspace(1)* @test2(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  br
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT:  %obj = load
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  gc.relocate
+; CHECK-NEXT:  ret i64 addrspace(1)* %obj.relocated
+
+  %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %obj
+
+untaken:
+  ret i64 addrspace(1)* null
+}
+
+; A local kill should effect values live from a successor phi.  Also, we
+; should only propagate liveness from a phi to the appropriate predecessors.
+define i64 addrspace(1)* @test3(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj = load
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i64 addrspace(1)*
+; CHECK-NEXT: br label %merge
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  br label %merge
+
+untaken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: br label %merge
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  br label %merge
+
+merge:
+  %phi = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
+  ret i64 addrspace(1)* %phi
+}
+
+; A base pointer must be live if it is needed at a later statepoint,
+; even if the base pointer is otherwise unused.
+define i64 addrspace(1)* @test4(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  %derived = getelementptr
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  %derived.relocated = 
+; CHECK-NEXT:  %obj.relocated =
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  %derived.relocated1 = 
+; Note: It's legal to relocate obj again, but not strictly needed
+; CHECK-NEXT:  %obj.relocated2 =
+; CHECK-NEXT:  ret i64 addrspace(1)* %derived.relocated1
+; 
+  %derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %derived
+}
+
+declare void @consume(...) readonly
+
+; Make sure that a phi def visited during iteration is considered a kill.
+; Also, liveness after base pointer analysis can change based on new uses,
+; not just new defs.
+define i64 addrspace(1)* @test5(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+  br i1 %cmp, label %taken, label %untaken
+
+taken:
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i64 addrspace(1)*
+; CHECK-NEXT: br label %merge
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  br label %merge
+
+untaken:
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: br label %merge
+  br label %merge
+
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)*
+; CHECK-NEXT: %obj2a = phi
+; CHECK-NEXT: @consume
+; CHECK-NEXT: br label %final
+  %obj2a = phi i64 addrspace(1)* [ %obj, %taken ], [null, %untaken]
+  call void (...) @consume(i64 addrspace(1)* %obj2a)
+  br label %final
+final:
+; CHECK-LABEL: final:
+; CHECK-NEXT: @consume
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+  call void (...) @consume(i64 addrspace(1)* %obj2a)
+  ret i64 addrspace(1)* %obj
+}
+
+declare void @foo()
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+
diff --git a/test/Transforms/RewriteStatepointsForGC/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
new file mode 100644
index 0000000..29a7dcc
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
@@ -0,0 +1,65 @@
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+
+declare void @consume(...)
+
+; Test to make sure we destroy LCSSA's single entry phi nodes before
+; running liveness
+define void @test6(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+entry:
+  br label %next
+
+next:
+; CHECK-LABEL: next:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated)
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated)
+  %obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  call void (...) @consume(i64 addrspace(1)* %obj2)
+  call void (...) @consume(i64 addrspace(1)* %obj)
+  ret void
+}
+
+declare void @some_call(i64 addrspace(1)*)
+
+; Need to delete unreachable gc.statepoint call
+define void @test7() gc "statepoint-example" {
+; CHECK-LABEL: test7
+; CHECK-NOT: gc.statepoint
+  ret void
+
+unreached:
+  %obj = phi i64 addrspace(1)* [null, %unreached]
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  call void (...) @consume(i64 addrspace(1)* %obj)
+  br label %unreached
+}
+
+; Need to delete unreachable gc.statepoint invoke - tested seperately given
+; a correct implementation could only remove the instructions, not the block
+define void @test8() gc "statepoint-example" {
+; CHECK-LABEL: test8
+; CHECK-NOT: gc.statepoint
+  ret void
+
+unreached:
+  invoke i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+          to label %normal_return unwind label %exceptional_return
+
+normal_return:                                    ; preds = %entry
+  ret void
+
+exceptional_return:                               ; preds = %entry
+  %landing_pad4 = landingpad { i8*, i32 } personality i32 ()* undef
+          cleanup
+  ret void
+}
+
+declare void @foo()
+; Bound the last check-not
+; CHECK-LABEL: @foo
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+
diff --git a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
index b2dc2a1..73ebf2f 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
@@ -24,7 +24,7 @@ normal_dest:
 ;; CHECK-LABEL: normal_dest:
 ;; CHECK-NEXT: gc.statepoint
 ;; CHECK-NEXT: %obj.relocated = call coldcc i64* addrspace(1)*                                      
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @gc_call, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @gc_call, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64* addrspace(1)* %obj
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/relocation.ll b/test/Transforms/RewriteStatepointsForGC/relocation.ll
index c3bda3d..da092ee 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocation.ll
@@ -9,7 +9,7 @@ entry:
 ; CHECK-LABEL: @test1
 ; CHECK-DAG: %obj.relocated
 ; CHECK-DAG: %obj2.relocated
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
   br label %joint
 
 joint:
@@ -61,7 +61,7 @@ loop_x:
   br label %loop.backedge
 
 loop.backedge:
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
   br label %loop
 
 loop_y:
@@ -79,14 +79,14 @@ if_branch:
 ; CHECK-LABEL: if_branch:
 ; CHECK: gc.statepoint
 ; CHECK: gc.relocate
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
   br label %join
 
 else_branch:
 ; CHECK-LABEL: else_branch:
 ; CHECK: gc.statepoint
 ; CHECK: gc.relocate
-  %safepoint_token1 = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
+  %safepoint_token1 = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 0)
   br label %join
 
 join:
@@ -96,7 +96,7 @@ join:
 ; CHECK-DAG: [ %arg.relocated, %if_branch ]
 ; CHECK-DAG: [ %arg.relocated4, %else_branch ]
 ; CHECK-NOT: phi
-  call void (i8 addrspace(1)*)* @some_call(i8 addrspace(1)* %arg)
+  call void (i8 addrspace(1)*) @some_call(i8 addrspace(1)* %arg)
   ret void
 }
 
@@ -109,8 +109,8 @@ entry:
 ; CHECK: gc.statepoint
 ; CHECK-NEXT: gc.relocate
 ; CHECK-NEXT: gc.statepoint
-  %safepoint_token = call i32 (void (i64)*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidi64f(void (i64)* undef, i32 1, i32 0, i64 undef, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
-  %safepoint_token1 = call i32 (i32 (i64 addrspace(1)*)*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(void (i64)* undef, i32 1, i32 0, i64 undef, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token1 = call i32 (i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret void
 }
 
@@ -123,10 +123,10 @@ define void @test4() gc "statepoint-example" {
 ; CHECK: gc.statepoint
 ; CHECK: gc.relocate
 ; CHECK: @use(i8 addrspace(1)* %res.relocated)
-  %safepoint_token2 = tail call i32 (i8 addrspace(1)* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p1i8f(i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0)
+  %safepoint_token2 = tail call i32 (i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0)
   %res = call i8 addrspace(1)* @llvm.experimental.gc.result.ptr.p1i8(i32 %safepoint_token2)
-  call i32 (i8 addrspace(1)* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p1i8f(i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0)
-  call void (...)* @use(i8 addrspace(1)* %res)
+  call i32 (i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0)
+  call void (...) @use(i8 addrspace(1)* %res)
   unreachable
 }
 
@@ -135,7 +135,7 @@ define void @test4() gc "statepoint-example" {
 define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
 ; CHECK-LABEL: test5
 entry:
-  call i32 (i8 addrspace(1)* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p1i8f(i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0)
+  call i32 (i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0)
   switch i32 undef, label %kill [
     i32 10, label %merge
     i32 13, label %merge
@@ -151,7 +151,7 @@ merge:
 ; CHECK-DAG: [ %arg.relocated, %entry ]
 ; CHECK-DAG: [ %arg.relocated, %entry ]
   %test = phi i8 addrspace(1)* [ null, %kill ], [ %arg, %entry ], [ %arg, %entry ]
-  call void (...)* @use(i8 addrspace(1)* %test)
+  call void (...) @use(i8 addrspace(1)* %test)
   unreachable
 }
 
@@ -169,7 +169,7 @@ do_safepoint:
 ; CHECK: arg1.relocated = 
 ; CHECK: arg2.relocated = 
 ; CHECK: arg3.relocated = 
-  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+  call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
   br label %gc.safepoint_poll.exit2
 
 gc.safepoint_poll.exit2:
@@ -183,7 +183,7 @@ gc.safepoint_poll.exit2:
 ; CHECK: phi i8 addrspace(1)*
 ; CHECK-DAG: [ %arg1, %entry ]
 ; CHECK-DAG:  [ %arg1.relocated, %do_safepoint ]
-  call void (...)* @use(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+  call void (...) @use(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
   ret void
 }
 
@@ -208,7 +208,7 @@ outer-inc:
 ; CHECK-LABEL: outer-inc:
 ; CHECK: %arg1.relocated
 ; CHECK: %arg2.relocated
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
   br label %outer-loop
 }
 
@@ -237,7 +237,7 @@ inner-loop:
 ; CHECK: gc.statepoint
 ; CHECK: %arg1.relocated
 ; CHECK: %arg2.relocated
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
   br i1 %cmp, label %inner-loop, label %outer-inc
 
 outer-inc:
@@ -257,7 +257,7 @@ branch2:
   br i1 %condition, label %callbb, label %join2
 
 callbb:
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %join
 
 join:
diff --git a/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll b/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
index 7aced66..1ac6bcd 100644
--- a/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
+++ b/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
@@ -3,7 +3,7 @@
 
 define i32 @main() nounwind {
 entry:
-	%0 = tail call signext i8 (...)* @sin() nounwind
+	%0 = tail call signext i8 (...) @sin() nounwind
 	ret i32 0
 }
 
diff --git a/test/Transforms/SCCP/retvalue-undef.ll b/test/Transforms/SCCP/retvalue-undef.ll
index 5a4ba11..f0e9e67 100644
--- a/test/Transforms/SCCP/retvalue-undef.ll
+++ b/test/Transforms/SCCP/retvalue-undef.ll
@@ -15,7 +15,7 @@ define internal i32 @g() {
 ; CHECK-NEXT: ret i32 8
 
 define internal void @outer_mod() {
-  %1 = call i32 ()* ()* @f()                      ; <i32 ()*> [#uses=1]
+  %1 = call i32 ()* () @f()                      ; <i32 ()*> [#uses=1]
   %2 = call i32 %1()                              ; <i32> [#uses=0]
   ret void
 }
diff --git a/test/Transforms/SLPVectorizer/X86/barriercall.ll b/test/Transforms/SLPVectorizer/X86/barriercall.ll
index 9def190..382a43f 100644
--- a/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n) {
 entry:
-  %call = tail call i32 (...)* @bar() #2
+  %call = tail call i32 (...) @bar() #2
   %mul = mul nsw i32 %n, 5
   %add = add nsw i32 %mul, 9
   store i32 %add, i32* %A, align 4
diff --git a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
index 3c457c4..be17c5d 100644
--- a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
+++ b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -36,7 +36,7 @@ for.body:                                         ; preds = %for.inc, %entry
   br i1 %cmp11, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
index ea0064d..eac20a0 100644
--- a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
+++ b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
@@ -35,7 +35,7 @@ entry:
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:
-  %call = tail call i32 (...)* @bar()
+  %call = tail call i32 (...) @bar()
   br label %if.end
 
 if.end:
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index ccacbcc..60ff358 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -59,7 +59,7 @@ attributes #1 = { nounwind readnone }
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "file.c", directory: "/Users/nadav")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4}
 !4 = !MDSubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (double*, i32)* @depth, variables: !11)
 !5 = !MDFile(filename: "file.c", directory: "/Users/nadav")
diff --git a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
index a638548..b0ce074 100644
--- a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
+++ b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
@@ -35,7 +35,7 @@ for.body:                                         ; preds = %for.inc, %entry
   br i1 %cmp11, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/test/Transforms/SLPVectorizer/X86/multi_block.ll b/test/Transforms/SLPVectorizer/X86/multi_block.ll
index 993054a..b381d06 100644
--- a/test/Transforms/SLPVectorizer/X86/multi_block.ll
+++ b/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -34,7 +34,7 @@ define i32 @bar(double* nocapture %A, i32 %d) {
   br i1 %6, label %9, label %7
 
 ; <label>:7                                       ; preds = %0
-  %8 = tail call i32 (...)* @foo()
+  %8 = tail call i32 (...) @foo()
   br label %9
 
 ; <label>:9                                       ; preds = %0, %7
diff --git a/test/Transforms/SLPVectorizer/X86/pr16628.ll b/test/Transforms/SLPVectorizer/X86/pr16628.ll
index c22ed34..06abe91 100644
--- a/test/Transforms/SLPVectorizer/X86/pr16628.ll
+++ b/test/Transforms/SLPVectorizer/X86/pr16628.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; Function Attrs: nounwind ssp uwtable
 define void @f() {
 entry:
-  %call = tail call i32 (...)* @g()
+  %call = tail call i32 (...) @g()
   %0 = load i32, i32* @c, align 4
   %lnot = icmp eq i32 %0, 0
   %lnot.ext = zext i1 %lnot to i32
diff --git a/test/Transforms/SROA/ppcf128-no-fold.ll b/test/Transforms/SROA/ppcf128-no-fold.ll
new file mode 100644
index 0000000..3f2934c
--- /dev/null
+++ b/test/Transforms/SROA/ppcf128-no-fold.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -sroa -S | FileCheck %s 
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.ld2 = type { [2 x ppc_fp128] }
+declare void @bar(i8*, [2 x i128])
+
+define void @foo(i8* %v) #0 {
+entry:
+  %v.addr = alloca i8*, align 8
+  %z = alloca %struct.ld2, align 16
+  store i8* %v, i8** %v.addr, align 8
+  %dat = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat, i32 0, i64 0
+  store ppc_fp128 0xM403B0000000000000000000000000000, ppc_fp128* %arrayidx, align 16
+  %dat1 = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0
+  %arrayidx2 = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat1, i32 0, i64 1
+  store ppc_fp128 0xM4093B400000000000000000000000000, ppc_fp128* %arrayidx2, align 16
+  %0 = load i8*, i8** %v.addr, align 8
+  %coerce.dive = getelementptr %struct.ld2, %struct.ld2* %z, i32 0, i32 0
+  %1 = bitcast [2 x ppc_fp128]* %coerce.dive to [2 x i128]*
+  %2 = load [2 x i128], [2 x i128]* %1, align 1
+  call void @bar(i8* %0, [2 x i128] %2)
+  ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK-NOT: i128 4628293042053316608
+; CHECK-NOT: i128 4653260752096854016
+; CHECK-DAG: i128 bitcast (ppc_fp128 0xM403B0000000000000000000000000000 to i128)
+; CHECK-DAG: i128 bitcast (ppc_fp128 0xM4093B400000000000000000000000000 to i128)
+; CHECK: call void @bar(i8* %v, [2 x i128]
+; CHECK: ret void
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index a2d1fc3..ea41ee1 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -70,7 +70,7 @@ for.body:                                         ; preds = %if.end, %for.body
 
 if.end6:                                          ; preds = %for.body, %if.end
   %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
-  %call7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
+  %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
   br label %return, !dbg !40
 ; CHECK: edge if.end6 -> return probability is 16 / 16 = 100% [HOT edge]
 
@@ -100,7 +100,7 @@ attributes #4 = { nounwind readonly }
 
 !0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 192896) (llvm/trunk 192895)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "branch.cc", directory: ".")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4}
 !4 = !MDSubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 (i32, i8**)* @main, variables: !12)
 !5 = !MDFile(filename: "branch.cc", directory: ".")
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
index 7576dcb..a484995 100644
--- a/test/Transforms/SampleProfile/calls.ll
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -83,7 +83,7 @@ if.end:                                           ; preds = %if.else, %if.then
 
 while.end:                                        ; preds = %while.cond
   %4 = load i32, i32* %s, align 4, !dbg !24
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
   ret i32 0, !dbg !25
 }
 
diff --git a/test/Transforms/SampleProfile/fnptr.ll b/test/Transforms/SampleProfile/fnptr.ll
index 07c3c75..9b6653e 100644
--- a/test/Transforms/SampleProfile/fnptr.ll
+++ b/test/Transforms/SampleProfile/fnptr.ll
@@ -114,7 +114,7 @@ for.inc12:                                        ; preds = %for.inc
 
 for.end14:                                        ; preds = %for.inc12
   %S.2.lcssa.lcssa = phi double [ %S.2.lcssa, %for.inc12 ]
-  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), double %S.2.lcssa.lcssa), !dbg !24
+  %call15 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), double %S.2.lcssa.lcssa), !dbg !24
   ret i32 0, !dbg !25
 }
 
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
index fc9dedb..523122a 100644
--- a/test/Transforms/SampleProfile/propagate.ll
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -184,7 +184,7 @@ entry:
   %4 = load i32, i32* %y, align 4, !dbg !41
   %5 = load i64, i64* %N, align 8, !dbg !41
   %call = call i64 @_Z3fooiil(i32 %3, i32 %4, i64 %5), !dbg !41
-  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !41
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !41
   ret i32 0, !dbg !42
 }
 
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 80ce25e..10bad7d 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -61,5 +61,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !16 = !MDLocation(line: 5, column: 5, scope: !11)
 !17 = !{!1}
 !18 = !MDFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
-!19 = !{i32 0}
+!19 = !{}
 !20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/ScalarRepl/inline-vector.ll b/test/Transforms/ScalarRepl/inline-vector.ll
index 5d856c6..85f3741 100644
--- a/test/Transforms/ScalarRepl/inline-vector.ll
+++ b/test/Transforms/ScalarRepl/inline-vector.ll
@@ -45,7 +45,7 @@ for.end:                                          ; preds = %for.cond
   %x = getelementptr inbounds %struct.Vector4, %struct.Vector4* %vector, i32 0, i32 0
   %tmp5 = load float, float* %x, align 16
   %conv = fpext float %tmp5 to double
-  %call = call i32 (...)* @printf(double %conv) nounwind
+  %call = call i32 (...) @printf(double %conv) nounwind
   ret void
 }
 
diff --git a/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
index 935c289..d0ed20b 100644
--- a/test/Transforms/ScalarRepl/only-memcpy-uses.ll
+++ b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
@@ -18,7 +18,7 @@ entry:
   %tmp2 = bitcast %struct.S* %agg.tmp to i8*
   %tmp3 = bitcast %struct.S* %t to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false)
-  %call = call i32 (...)* @bazz(%struct.S* byval %agg.tmp)
+  %call = call i32 (...) @bazz(%struct.S* byval %agg.tmp)
   ret void
 }
 
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
index 6089936..a44f081 100644
--- a/test/Transforms/ScalarRepl/phi-cycle.ll
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -67,10 +67,10 @@ while.cond.backedge.i:                            ; preds = %if.end.i, %while.bo
 
 ; CHECK: func.exit:
 ; CHECK-NOT: load
-; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
+; CHECK: %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
 func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
   %tmp3 = load i32, i32* %x.i, align 4
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
   ret i32 0
 }
 
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
index 2bc9335..37a810f 100644
--- a/test/Transforms/Scalarizer/dbginfo.ll
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -59,7 +59,7 @@ attributes #1 = { nounwind readnone }
 
 !0 = !MDCompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !MDFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
-!2 = !{i32 0}
+!2 = !{}
 !3 = !{!4}
 !4 = !MDSubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, variables: !14)
 !5 = !MDFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
diff --git a/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll b/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
index 46df0f0..99041ed 100644
--- a/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
+++ b/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
@@ -13,7 +13,7 @@ bb5.outer:		; preds = %bb5.outer.loopexit, %entry
 	br label %bb5
 
 bb5:		; preds = %bb5, %bb5.outer
-	%tmp6 = tail call i32 (...)* @foo( ) nounwind 		; <i32> [#uses=1]
+	%tmp6 = tail call i32 (...) @foo( ) nounwind 		; <i32> [#uses=1]
 	switch i32 %tmp6, label %bb13 [
 		 i32 -1, label %bb10
 		 i32 102, label %bb5
@@ -21,7 +21,7 @@ bb5:		; preds = %bb5, %bb5.outer
 	]
 
 bb10:		; preds = %bb5
-	%tmp12 = tail call i32 (...)* @bar( i32 %undo.0.ph ) nounwind 		; <i32> [#uses=0]
+	%tmp12 = tail call i32 (...) @bar( i32 %undo.0.ph ) nounwind 		; <i32> [#uses=0]
 	br label %UnifiedReturnBlock
 
 bb13:		; preds = %bb5
diff --git a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
index 2ef49bf..154677b 100644
--- a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
+++ b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
@@ -29,7 +29,7 @@ cowblock:		; preds = %beeblock, %monkeyblock
 
 func_1.exit:		; preds = %cowblock, %entry
 	%outval = phi i32 [ %cowval, %cowblock ], [ 1, %entry ]		; <i32> [#uses=1]
-	%pout = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %outval ) nounwind 		; <i32> [#uses=0]
+	%pout = tail call i32 (i8*, ...) @printf( i8* noalias  getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %outval ) nounwind 		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll b/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
index 7271024..4fc21d9 100644
--- a/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
+++ b/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
@@ -9,7 +9,7 @@ entry:
 	br i1 %cmp, label %ifthen, label %ifend
 
 ifthen:		; preds = %entry
-	%call = call i32 (...)* @foo()		; <i32> [#uses=0]
+	%call = call i32 (...) @foo()		; <i32> [#uses=0]
 	br label %ifend
 
 ifend:		; preds = %ifthen, %entry
@@ -17,7 +17,7 @@ ifend:		; preds = %ifthen, %entry
 	br i1 %cmp2, label %ifthen3, label %ifend5
 
 ifthen3:		; preds = %ifend
-	%call4 = call i32 (...)* @foo()		; <i32> [#uses=0]
+	%call4 = call i32 (...) @foo()		; <i32> [#uses=0]
 	br label %ifend5
 
 ifend5:		; preds = %ifthen3, %ifend
@@ -25,7 +25,7 @@ ifend5:		; preds = %ifthen3, %ifend
 	br i1 %cmp7, label %ifthen8, label %ifend10
 
 ifthen8:		; preds = %ifend5
-	%call9 = call i32 (...)* @bar()		; <i32> [#uses=0]
+	%call9 = call i32 (...) @bar()		; <i32> [#uses=0]
 	br label %ifend10
 
 ifend10:		; preds = %ifthen8, %ifend5
@@ -33,7 +33,7 @@ ifend10:		; preds = %ifthen8, %ifend5
 	br i1 %cmp12, label %ifthen13, label %ifend15
 
 ifthen13:		; preds = %ifend10
-	%call14 = call i32 (...)* @bar()		; <i32> [#uses=0]
+	%call14 = call i32 (...) @bar()		; <i32> [#uses=0]
 	br label %ifend15
 
 ifend15:		; preds = %ifthen13, %ifend10
diff --git a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index 22b144b..8718c55 100644
--- a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -22,11 +22,15 @@ entry:
         invoke void @test2( )
                         to label %N unwind label %U
 U:
+  %res = landingpad { i8* } personality i32 (...)* @__gxx_personality_v0
+          cleanup
         unreachable
 N:
         ret void
 }
 
+declare i32 @__gxx_personality_v0(...)
+
 define i32 @test3(i32 %v) {
 ; CHECK-LABEL: @test3(
 ; CHECK: entry:
diff --git a/test/Transforms/SimplifyCFG/common-dest-folding.ll b/test/Transforms/SimplifyCFG/common-dest-folding.ll
index 0aa3b2c..e3e27c7 100644
--- a/test/Transforms/SimplifyCFG/common-dest-folding.ll
+++ b/test/Transforms/SimplifyCFG/common-dest-folding.ll
@@ -18,7 +18,7 @@ define i32 @foo(i32 %k, i32 %c1, i32 %c2) {
   br i1 %5, label %8, label %6
 
 ; <label>:6                                       ; preds = %3
-  %7 = tail call i32 (...)* @bar() nounwind
+  %7 = tail call i32 (...) @bar() nounwind
   br label %8
 
 ; <label>:8                                       ; preds = %3, %0, %6
@@ -47,7 +47,7 @@ bb3:                                              ; preds = %bb
   br i1 %tmp6, label %bb9, label %bb7
 
 bb7:                                              ; preds = %bb3
-  %tmp8 = tail call i32 (...)* @bar() #1
+  %tmp8 = tail call i32 (...) @bar() #1
   br label %bb9
 
 bb9:                                              ; preds = %bb7, %bb3, %bb
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index ac5ab60..ee7df26 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -4,17 +4,17 @@ define i32 @foo(i32 %i) nounwind ssp {
   call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !MDExpression()), !dbg !7
   call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !MDExpression()), !dbg !11
   %1 = icmp ne i32 %i, 0, !dbg !12
-;CHECK: call i32 (...)* @bar()
+;CHECK: call i32 (...) @bar()
 ;CHECK-NEXT: llvm.dbg.value
   br i1 %1, label %2, label %4, !dbg !12
 
 ; <label>:2                                       ; preds = %0
-  %3 = call i32 (...)* @bar(), !dbg !13
+  %3 = call i32 (...) @bar(), !dbg !13
   call void @llvm.dbg.value(metadata i32 %3, i64 0, metadata !9, metadata !MDExpression()), !dbg !13
   br label %6, !dbg !15
 
 ; <label>:4                                       ; preds = %0
-  %5 = call i32 (...)* @bar(), !dbg !16
+  %5 = call i32 (...) @bar(), !dbg !16
   call void @llvm.dbg.value(metadata i32 %5, i64 0, metadata !9, metadata !MDExpression()), !dbg !16
   br label %6, !dbg !18
 
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index a86649b..65c7e41 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -13,7 +13,7 @@ define void @foo() nounwind ssp {
 
 !0 = !MDSubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
 !1 = !MDFile(filename: "foo.c", directory: "/private/tmp")
-!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !4, retainedTypes: !4, subprograms: !9)
+!2 = !MDCompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
 !3 = !MDSubroutineType(types: !4)
 !4 = !{null}
 !5 = !MDLocation(line: 4, column: 2, scope: !6)
diff --git a/test/Transforms/SimplifyCFG/volatile-phioper.ll b/test/Transforms/SimplifyCFG/volatile-phioper.ll
index f2d4b8b..c366d05 100644
--- a/test/Transforms/SimplifyCFG/volatile-phioper.ll
+++ b/test/Transforms/SimplifyCFG/volatile-phioper.ll
@@ -17,12 +17,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 define void @test(i8** nocapture %PeiServices) #0 {
 entry:
-  %call = tail call i32 (...)* @Trace() #2
+  %call = tail call i32 (...) @Trace() #2
   %tobool = icmp eq i32 %call, 0
   br i1 %tobool, label %while.body, label %if.then
 
 if.then:                                          ; preds = %entry
-  %call1 = tail call i32 (...)* @Trace() #2
+  %call1 = tail call i32 (...) @Trace() #2
   br label %while.body
 
 while.body:                                       ; preds = %entry, %if.then, %while.body
diff --git a/test/Transforms/StraightLineStrengthReduce/X86/no-slsr.ll b/test/Transforms/StraightLineStrengthReduce/X86/no-slsr.ll
index 94c47c7..e2201ce 100644
--- a/test/Transforms/StraightLineStrengthReduce/X86/no-slsr.ll
+++ b/test/Transforms/StraightLineStrengthReduce/X86/no-slsr.ll
@@ -5,8 +5,8 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; Do not perform SLSR on &input[s] and &input[s * 2] which fit into addressing
 ; modes of X86.
-define i32 @slsr_gep(i32* %input, i64 %s) {
-; CHECK-LABEL: @slsr_gep(
+define i32 @no_slsr_gep(i32* %input, i64 %s) {
+; CHECK-LABEL: @no_slsr_gep(
   ; v0 = input[0];
   %p0 = getelementptr inbounds i32, i32* %input, i64 0
   %v0 = load i32, i32* %p0
@@ -28,3 +28,17 @@ define i32 @slsr_gep(i32* %input, i64 %s) {
   ret i32 %2
 }
 
+define void @no_slsr_add(i32 %b, i32 %s) {
+; CHECK-LABEL: @no_slsr_add(
+  %1 = add i32 %b, %s
+; CHECK: add i32 %b, %s
+  call void @foo(i32 %1)
+  %s2 = mul i32 %s, 2
+; CHECK: %s2 = mul i32 %s, 2
+  %2 = add i32 %b, %s2
+; CHECK: add i32 %b, %s2
+  call void @foo(i32 %2)
+  ret void
+}
+
+declare void @foo(i32 %a)
diff --git a/test/Transforms/StraightLineStrengthReduce/slsr-add.ll b/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
new file mode 100644
index 0000000..4c79be0
--- /dev/null
+++ b/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
@@ -0,0 +1,101 @@
+; RUN: opt < %s -slsr -gvn -dce -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+define void @shl(i32 %b, i32 %s) {
+; CHECK-LABEL: @shl(
+  %1 = add i32 %b, %s
+; [[BASIS:%[a-zA-Z0-9]+]] = add i32 %b, %s
+  call void @foo(i32 %1)
+  %s2 = shl i32 %s, 1
+  %2 = add i32 %b, %s2
+; add i32 [[BASIS]], %s
+  call void @foo(i32 %2)
+  ret void
+}
+
+define void @stride_is_2s(i32 %b, i32 %s) {
+; CHECK-LABEL: @stride_is_2s(
+  %s2 = shl i32 %s, 1
+; CHECK: %s2 = shl i32 %s, 1
+  %1 = add i32 %b, %s2
+; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %b, %s2
+  call void @foo(i32 %1)
+  %s4 = shl i32 %s, 2
+  %2 = add i32 %b, %s4
+; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i32 [[t1]], %s2
+  call void @foo(i32 %2)
+  %s6 = mul i32 %s, 6
+  %3 = add i32 %b, %s6
+; CHECK: add i32 [[t2]], %s2
+  call void @foo(i32 %3)
+  ret void
+}
+
+define void @stride_is_3s(i32 %b, i32 %s) {
+; CHECK-LABEL: @stride_is_3s(
+  %1 = add i32 %s, %b
+; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %s, %b
+  call void @foo(i32 %1)
+  %s4 = shl i32 %s, 2
+  %2 = add i32 %s4, %b
+; CHECK: [[bump:%[a-zA-Z0-9]+]] = mul i32 %s, 3
+; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i32 [[t1]], [[bump]]
+  call void @foo(i32 %2)
+  %s7 = mul i32 %s, 7
+  %3 = add i32 %s7, %b
+; CHECK: add i32 [[t2]], [[bump]]
+  call void @foo(i32 %3)
+  ret void
+}
+
+; foo(b + 6 * s);
+; foo(b + 4 * s);
+; foo(b + 2 * s);
+;   =>
+; t1 = b + 6 * s;
+; foo(t1);
+; s2 = 2 * s;
+; t2 = t1 - s2;
+; foo(t2);
+; t3 = t2 - s2;
+; foo(t3);
+define void @stride_is_minus_2s(i32 %b, i32 %s) {
+; CHECK-LABEL: @stride_is_minus_2s(
+  %s6 = mul i32 %s, 6
+  %1 = add i32 %b, %s6
+; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i32 %b, %s6
+; CHECK: call void @foo(i32 [[t1]])
+  call void @foo(i32 %1)
+  %s4 = shl i32 %s, 2
+  %2 = add i32 %b, %s4
+; CHECK: [[bump:%[a-zA-Z0-9]+]] = shl i32 %s, 1
+; CHECK: [[t2:%[a-zA-Z0-9]+]] = sub i32 [[t1]], [[bump]]
+  call void @foo(i32 %2)
+; CHECK: call void @foo(i32 [[t2]])
+  %s2 = shl i32 %s, 1
+  %3 = add i32 %b, %s2
+; CHECK: [[t3:%[a-zA-Z0-9]+]] = sub i32 [[t2]], [[bump]]
+  call void @foo(i32 %3)
+; CHECK: call void @foo(i32 [[t3]])
+  ret void
+}
+
+; t = b + (s << 3);
+; foo(t);
+; foo(b + s);
+;
+; do not rewrite b + s to t - 7 * s because the latter is more complicated.
+define void @simple_enough(i32 %b, i32 %s) {
+; CHECK-LABEL: @simple_enough(
+  %s8 = shl i32 %s, 3
+  %1 = add i32 %b, %s8
+  call void @foo(i32 %1)
+  %2 = add i32 %b, %s
+; CHECK: [[t:%[a-zA-Z0-9]+]] = add i32 %b, %s{{$}}
+  call void @foo(i32 %2)
+; CHECK: call void @foo(i32 [[t]])
+  ret void
+}
+
+declare void @foo(i32)
diff --git a/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll b/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
index 47e6637..3944739 100644
--- a/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
+++ b/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
@@ -2,77 +2,108 @@
 
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 
-define i32 @slsr_gep(i32* %input, i64 %s) {
+; foo(input[0]);
+; foo(input[s]);
+; foo(input[s * 2]);
+;   =>
+; p0 = &input[0];
+; foo(*p);
+; p1 = p0 + s;
+; foo(*p1);
+; p2 = p1 + s;
+; foo(*p2);
+define void @slsr_gep(i32* %input, i64 %s) {
 ; CHECK-LABEL: @slsr_gep(
   ; v0 = input[0];
   %p0 = getelementptr inbounds i32, i32* %input, i64 0
   %v0 = load i32, i32* %p0
+  call void @foo(i32 %v0)
 
   ; v1 = input[s];
   %p1 = getelementptr inbounds i32, i32* %input, i64 %s
 ; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s
   %v1 = load i32, i32* %p1
+  call void @foo(i32 %v1)
 
   ; v2 = input[s * 2];
-  %s2 = mul nsw i64 %s, 2
+  %s2 = shl nsw i64 %s, 1
   %p2 = getelementptr inbounds i32, i32* %input, i64 %s2
 ; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %s
   %v2 = load i32, i32* %p2
+  call void @foo(i32 %v2)
 
-  ; return v0 + v1 + v2;
-  %1 = add i32 %v0, %v1
-  %2 = add i32 %1, %v2
-  ret i32 %2
+  ret void
 }
 
-define i32 @slsr_gep_sext(i32* %input, i32 %s) {
+; foo(input[0]);
+; foo(input[(long)s]);
+; foo(input[(long)(s * 2)]);
+;   =>
+; p0 = &input[0];
+; foo(*p);
+; p1 = p0 + (long)s;
+; foo(*p1);
+; p2 = p1 + (long)s;
+; foo(*p2);
+define void @slsr_gep_sext(i32* %input, i32 %s) {
 ; CHECK-LABEL: @slsr_gep_sext(
   ; v0 = input[0];
   %p0 = getelementptr inbounds i32, i32* %input, i64 0
   %v0 = load i32, i32* %p0
+  call void @foo(i32 %v0)
 
-  ; v1 = input[(long)s];
+  ; v1 = input[s];
   %t = sext i32 %s to i64
   %p1 = getelementptr inbounds i32, i32* %input, i64 %t
 ; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %t
   %v1 = load i32, i32* %p1
+  call void @foo(i32 %v1)
 
-  ; v2 = input[(long)(s * 2)];
-  %s2 = mul nsw i32 %s, 2
+  ; v2 = input[s * 2];
+  %s2 = shl nsw i32 %s, 1
   %t2 = sext i32 %s2 to i64
   %p2 = getelementptr inbounds i32, i32* %input, i64 %t2
 ; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %t
   %v2 = load i32, i32* %p2
+  call void @foo(i32 %v2)
 
-  ; return v0 + v1 + v2;
-  %1 = add i32 %v0, %v1
-  %2 = add i32 %1, %v2
-  ret i32 %2
+  ret void
 }
 
-define i32 @slsr_gep_2d([10 x [5 x i32]]* %input, i64 %s, i64 %t) {
+; int input[10][5];
+; foo(input[s][t]);
+; foo(input[s * 2][t]);
+; foo(input[s * 3][t]);
+;   =>
+; p0 = &input[s][t];
+; foo(*p0);
+; p1 = p0 + 5s;
+; foo(*p1);
+; p2 = p1 + 5s;
+; foo(*p2);
+define void @slsr_gep_2d([10 x [5 x i32]]* %input, i64 %s, i64 %t) {
 ; CHECK-LABEL: @slsr_gep_2d(
   ; v0 = input[s][t];
   %p0 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s, i64 %t
   %v0 = load i32, i32* %p0
+  call void @foo(i32 %v0)
 
   ; v1 = input[s * 2][t];
-  %s2 = mul nsw i64 %s, 2
+  %s2 = shl nsw i64 %s, 1
 ; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 5
   %p1 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s2, i64 %t
 ; CHECK: %p1 = getelementptr inbounds i32, i32* %p0, i64 [[BUMP]]
   %v1 = load i32, i32* %p1
+  call void @foo(i32 %v1)
 
-  ; v2 = input[s * 3][t];
+  ; v3 = input[s * 3][t];
   %s3 = mul nsw i64 %s, 3
   %p2 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s3, i64 %t
 ; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 [[BUMP]]
   %v2 = load i32, i32* %p2
+  call void @foo(i32 %v2)
 
-  ; return v0 + v1 + v2;
-  %1 = add i32 %v0, %v1
-  %2 = add i32 %1, %v2
-  ret i32 %2
+  ret void
 }
 
 %struct.S = type <{ i64, i32 }>
@@ -83,27 +114,55 @@ define i32 @slsr_gep_2d([10 x [5 x i32]]* %input, i64 %s, i64 %t) {
 ; which may not be divisible by typeof(input[s][t].f1) = 8. Therefore, we
 ; rewrite the candidates using byte offset instead of index offset as in
 ; @slsr_gep_2d.
-define i64 @slsr_gep_uglygep([10 x [5 x %struct.S]]* %input, i64 %s, i64 %t) {
+define void @slsr_gep_uglygep([10 x [5 x %struct.S]]* %input, i64 %s, i64 %t) {
 ; CHECK-LABEL: @slsr_gep_uglygep(
   ; v0 = input[s][t].f1;
   %p0 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s, i64 %t, i32 0
   %v0 = load i64, i64* %p0
+  call void @bar(i64 %v0)
 
   ; v1 = input[s * 2][t].f1;
-  %s2 = mul nsw i64 %s, 2
+  %s2 = shl nsw i64 %s, 1
 ; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 60
   %p1 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s2, i64 %t, i32 0
 ; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
   %v1 = load i64, i64* %p1
+  call void @bar(i64 %v1)
 
   ; v2 = input[s * 3][t].f1;
   %s3 = mul nsw i64 %s, 3
   %p2 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s3, i64 %t, i32 0
 ; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
   %v2 = load i64, i64* %p2
+  call void @bar(i64 %v2)
+
+  ret void
+}
+
+define void @slsr_out_of_bounds_gep(i32* %input, i32 %s) {
+; CHECK-LABEL: @slsr_out_of_bounds_gep(
+  ; v0 = input[0];
+  %p0 = getelementptr i32, i32* %input, i64 0
+  %v0 = load i32, i32* %p0
+  call void @foo(i32 %v0)
 
-  ; return v0 + v1 + v2;
-  %1 = add i64 %v0, %v1
-  %2 = add i64 %1, %v2
-  ret i64 %2
+  ; v1 = input[(long)s];
+  %t = sext i32 %s to i64
+  %p1 = getelementptr i32, i32* %input, i64 %t
+; CHECK: %p1 = getelementptr i32, i32* %input, i64 %t
+  %v1 = load i32, i32* %p1
+  call void @foo(i32 %v1)
+
+  ; v2 = input[(long)(s * 2)];
+  %s2 = shl nsw i32 %s, 1
+  %t2 = sext i32 %s2 to i64
+  %p2 = getelementptr i32, i32* %input, i64 %t2
+; CHECK: %p2 = getelementptr i32, i32* %p1, i64 %t
+  %v2 = load i32, i32* %p2
+  call void @foo(i32 %v2)
+
+  ret void
 }
+
+declare void @foo(i32)
+declare void @bar(i64)
diff --git a/test/Transforms/StraightLineStrengthReduce/slsr-mul.ll b/test/Transforms/StraightLineStrengthReduce/slsr-mul.ll
index 0a7e472..1c7333d 100644
--- a/test/Transforms/StraightLineStrengthReduce/slsr-mul.ll
+++ b/test/Transforms/StraightLineStrengthReduce/slsr-mul.ll
@@ -2,37 +2,32 @@
 
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 
-declare i32 @foo(i32 %a)
-
-define i32 @slsr1(i32 %b, i32 %s) {
+define void @slsr1(i32 %b, i32 %s) {
 ; CHECK-LABEL: @slsr1(
-  ; v0 = foo(b * s);
+  ; foo(b * s);
   %mul0 = mul i32 %b, %s
 ; CHECK: mul i32
 ; CHECK-NOT: mul i32
-  %v0 = call i32 @foo(i32 %mul0)
+  call void @foo(i32 %mul0)
 
-  ; v1 = foo((b + 1) * s);
+  ; foo((b + 1) * s);
   %b1 = add i32 %b, 1
   %mul1 = mul i32 %b1, %s
-  %v1 = call i32 @foo(i32 %mul1)
+  call void @foo(i32 %mul1)
 
-  ; v2 = foo((b + 2) * s);
+  ; foo((b + 2) * s);
   %b2 = add i32 %b, 2
   %mul2 = mul i32 %b2, %s
-  %v2 = call i32 @foo(i32 %mul2)
+  call void @foo(i32 %mul2)
 
-  ; return v0 + v1 + v2;
-  %1 = add i32 %v0, %v1
-  %2 = add i32 %1, %v2
-  ret i32 %2
+  ret void
 }
 
-; v0 = foo(a * b)
-; v1 = foo((a + 1) * b)
-; v2 = foo(a * (b + 1))
-; v3 = foo((a + 1) * (b + 1))
-define i32 @slsr2(i32 %a, i32 %b) {
+; foo(a * b)
+; foo((a + 1) * b)
+; foo(a * (b + 1))
+; foo((a + 1) * (b + 1))
+define void @slsr2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @slsr2(
   %a1 = add i32 %a, 1
   %b1 = add i32 %b, 1
@@ -43,63 +38,50 @@ define i32 @slsr2(i32 %a, i32 %b) {
   %mul2 = mul i32 %a, %b1
   %mul3 = mul i32 %a1, %b1
 
-  %v0 = call i32 @foo(i32 %mul0)
-  %v1 = call i32 @foo(i32 %mul1)
-  %v2 = call i32 @foo(i32 %mul2)
-  %v3 = call i32 @foo(i32 %mul3)
+  call void @foo(i32 %mul0)
+  call void @foo(i32 %mul1)
+  call void @foo(i32 %mul2)
+  call void @foo(i32 %mul3)
 
-  %1 = add i32 %v0, %v1
-  %2 = add i32 %1, %v2
-  %3 = add i32 %2, %v3
-  ret i32 %3
+  ret void
 }
 
 ; The bump is a multiple of the stride.
 ;
-; v0 = foo(b * s);
-; v1 = foo((b + 2) * s);
-; v2 = foo((b + 4) * s);
-; return v0 + v1 + v2;
-;
-; ==>
-;
+; foo(b * s);
+; foo((b + 2) * s);
+; foo((b + 4) * s);
+;   =>
 ; mul0 = b * s;
-; v0 = foo(mul0);
 ; bump = s * 2;
 ; mul1 = mul0 + bump; // GVN ensures mul1 and mul2 use the same bump.
-; v1 = foo(mul1);
 ; mul2 = mul1 + bump;
-; v2 = foo(mul2);
-; return v0 + v1 + v2;
-define i32 @slsr3(i32 %b, i32 %s) {
+define void @slsr3(i32 %b, i32 %s) {
 ; CHECK-LABEL: @slsr3(
   %mul0 = mul i32 %b, %s
 ; CHECK: mul i32
-  %v0 = call i32 @foo(i32 %mul0)
+  call void @foo(i32 %mul0)
 
   %b1 = add i32 %b, 2
   %mul1 = mul i32 %b1, %s
-; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i32 %s, 2
+; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = shl i32 %s, 1
 ; CHECK: %mul1 = add i32 %mul0, [[BUMP]]
-  %v1 = call i32 @foo(i32 %mul1)
+  call void @foo(i32 %mul1)
 
   %b2 = add i32 %b, 4
   %mul2 = mul i32 %b2, %s
 ; CHECK: %mul2 = add i32 %mul1, [[BUMP]]
-  %v2 = call i32 @foo(i32 %mul2)
+  call void @foo(i32 %mul2)
 
-  %1 = add i32 %v0, %v1
-  %2 = add i32 %1, %v2
-  ret i32 %2
+  ret void
 }
 
 ; Do not rewrite a candidate if its potential basis does not dominate it.
-; v0 = 0;
+;
 ; if (cond)
-;   v0 = foo(a * b);
-; v1 = foo((a + 1) * b);
-; return v0 + v1;
-define i32 @not_dominate(i1 %cond, i32 %a, i32 %b) {
+;   foo(a * b);
+; foo((a + 1) * b);
+define void @not_dominate(i1 %cond, i32 %a, i32 %b) {
 ; CHECK-LABEL: @not_dominate(
 entry:
   %a1 = add i32 %a, 1
@@ -108,14 +90,14 @@ entry:
 then:
   %mul0 = mul i32 %a, %b
 ; CHECK: %mul0 = mul i32 %a, %b
-  %v0 = call i32 @foo(i32 %mul0)
+  call void @foo(i32 %mul0)
   br label %merge
 
 merge:
-  %v0.phi = phi i32 [ 0, %entry ], [ %mul0, %then ]
   %mul1 = mul i32 %a1, %b
 ; CHECK: %mul1 = mul i32 %a1, %b
-  %v1 = call i32 @foo(i32 %mul1)
-  %sum = add i32 %v0.phi, %v1
-  ret i32 %sum
+  call void @foo(i32 %mul1)
+  ret void
 }
+
+declare void @foo(i32)
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index 457dfd1..7fd3bcc 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -18,7 +18,7 @@ entry:
 !8 = !MDLocation(line: 3, column: 13, scope: !9)
 !9 = distinct !MDLexicalBlock(line: 3, column: 11, file: !10, scope: !0)
 !10 = !MDFile(filename: "/tmp/a.c", directory: "/Volumes/Lalgate/clean/D.CW")
-!11 = !{i32 0}
+!11 = !{}
 !12 = !{!0}
 !13 = !{!6}
 !14 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index febb944..d9511b8 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -30,7 +30,7 @@ attributes #2 = { nounwind readonly ssp }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!25}
 
-!0 = !MDCompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !23, globals: !24)
+!0 = !MDCompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
 !1 = !MDFile(filename: "g.c", directory: "/tmp/")
 !2 = !{null}
 !3 = !MDSubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
index 2921860..4f7a3ca 100644
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
@@ -11,7 +11,7 @@ bb:		; preds = %entry
 	br label %bb9
 
 bb5:		; preds = %bb9
-	%tmp7 = call i32 (...)* @bar( i32 %x.0 ) nounwind 		; <i32> [#uses=1]
+	%tmp7 = call i32 (...) @bar( i32 %x.0 ) nounwind 		; <i32> [#uses=1]
 	br label %bb9
 
 bb9:		; preds = %bb5, %bb, %entry
diff --git a/test/Verifier/2008-01-11-VarargAttrs.ll b/test/Verifier/2008-01-11-VarargAttrs.ll
index b6ce625..af97ce6 100644
--- a/test/Verifier/2008-01-11-VarargAttrs.ll
+++ b/test/Verifier/2008-01-11-VarargAttrs.ll
@@ -5,6 +5,6 @@
 declare void @foo(...)
 
 define void @bar() {
-	call void (...)* @foo(%struct* sret null )
+	call void (...) @foo(%struct* sret null )
 	ret void
 }
diff --git a/test/Verifier/dbg-typerefs.ll b/test/Verifier/dbg-typerefs.ll
new file mode 100644
index 0000000..55ee03e
--- /dev/null
+++ b/test/Verifier/dbg-typerefs.ll
@@ -0,0 +1,32 @@
+; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s
+; Check that the debug info verifier gives nice errors for bad type refs
+; (rather than crashing).
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+
+; Make a bunch of type references.  Note that !4 references !"0.bad" (instead
+; of !"4.bad") to test error ordering.
+!typerefs = !{!1, !2, !3, !4}
+!1 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"1.good")
+!2 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"2.bad")
+!3 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"3.good")
+!4 = !MDDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"0.bad")
+
+; Add a minimal compile unit to resolve some of the type references.
+!llvm.dbg.cu = !{!5}
+!5 = !MDCompileUnit(file: !6, language: DW_LANG_C99, retainedTypes: !7)
+!6 = !MDFile(filename: "file.c", directory: "/path/to/dir")
+!7 = !{!8, !9}
+!8 = !MDCompositeType(tag: DW_TAG_structure_type, identifier: "1.good")
+!9 = !MDCompositeType(tag: DW_TAG_structure_type, identifier: "3.good")
+
+; CHECK:      assembly parsed, but does not verify
+; CHECK-NEXT: unresolved type ref
+; CHECK-NEXT: !"0.bad"
+; CHECK-NEXT: !MDDerivedType(tag: DW_TAG_pointer_type
+; CHECK-SAME:                baseType: !"0.bad"
+; CHECK-NEXT: unresolved type ref
+; CHECK-NEXT: !"2.bad"
+; CHECK-NEXT: !MDDerivedType(tag: DW_TAG_pointer_type
+; CHECK-SAME:                baseType: !"2.bad"
+; CHECK-NOT:  unresolved
diff --git a/test/Verifier/frameescape.ll b/test/Verifier/frameescape.ll
index 54e0db4..1fb9387 100644
--- a/test/Verifier/frameescape.ll
+++ b/test/Verifier/frameescape.ll
@@ -5,8 +5,8 @@ declare i8* @llvm.framerecover(i8*, i8*, i32)
 
 define internal void @f() {
   %a = alloca i8
-  call void (...)* @llvm.frameescape(i8* %a)
-  call void (...)* @llvm.frameescape(i8* %a)
+  call void (...) @llvm.frameescape(i8* %a)
+  call void (...) @llvm.frameescape(i8* %a)
   ret void
 }
 ; CHECK: multiple calls to llvm.frameescape in one function
@@ -16,7 +16,7 @@ entry:
   %a = alloca i8
   br label %not_entry
 not_entry:
-  call void (...)* @llvm.frameescape(i8* %a)
+  call void (...) @llvm.frameescape(i8* %a)
   ret void
 }
 ; CHECK: llvm.frameescape used outside of entry block
@@ -51,14 +51,14 @@ define internal void @k(i32 %n) {
 
 define internal void @l(i8* %b) {
   %a = alloca i8
-  call void (...)* @llvm.frameescape(i8* %a, i8* %b)
+  call void (...) @llvm.frameescape(i8* %a, i8* %b)
   ret void
 }
 ; CHECK: llvm.frameescape only accepts static allocas
 
 define internal void @m() {
   %a = alloca i8
-  call void (...)* @llvm.frameescape(i8* %a)
+  call void (...) @llvm.frameescape(i8* %a)
   ret void
 }
 
diff --git a/test/Verifier/inalloca-vararg.ll b/test/Verifier/inalloca-vararg.ll
index 5099fd1..428f89e 100644
--- a/test/Verifier/inalloca-vararg.ll
+++ b/test/Verifier/inalloca-vararg.ll
@@ -3,7 +3,7 @@
 declare void @h(i32, ...)
 define void @i() {
   %args = alloca inalloca i32
-  call void (i32, ...)* @h(i32 1, i32* inalloca %args, i32 3)
+  call void (i32, ...) @h(i32 1, i32* inalloca %args, i32 3)
 ; CHECK: inalloca isn't on the last argument!
   ret void
 }
diff --git a/test/Verifier/invalid-statepoint.ll b/test/Verifier/invalid-statepoint.ll
index 7000973..d3a5bb8 100644
--- a/test/Verifier/invalid-statepoint.ll
+++ b/test/Verifier/invalid-statepoint.ll
@@ -12,7 +12,7 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #0
 
 define i32 addrspace(1)* @0(i32 addrspace(1)* %dparam) {
   %a00 = load i32, i32 addrspace(1)* %dparam
-  %to0 = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f0i1f(i1 ()* @return0i1, i32 9, i32 0, i2 0, i32 addrspace(1)* %dparam)
+  %to0 = call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f0i1f(i1 ()* @return0i1, i32 9, i32 0, i2 0, i32 addrspace(1)* %dparam)
   %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %to0, i32 0, i32 4)
   ret i32 addrspace(1)* %relocate
 }
diff --git a/test/Verifier/invalid-statepoint2.ll b/test/Verifier/invalid-statepoint2.ll
index 0d8b2a8..e3dd922 100644
--- a/test/Verifier/invalid-statepoint2.ll
+++ b/test/Verifier/invalid-statepoint2.ll
@@ -12,7 +12,7 @@ declare i32 @"personality_function"()
 define i64 addrspace(1)* @test1(i8 addrspace(1)* %arg, i32 %val) gc "statepoint-example" {
 entry:
   %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 %val, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 %val, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
   %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 9, i32 10)
   ret i64 addrspace(1)* %reloc
 }
diff --git a/test/Verifier/llvm.dbg.declare-address.ll b/test/Verifier/llvm.dbg.declare-address.ll
index f4a9366..0c23d6e 100644
--- a/test/Verifier/llvm.dbg.declare-address.ll
+++ b/test/Verifier/llvm.dbg.declare-address.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.declare(metadata !"", metadata !MDLocalVariable(tag: DW_TAG_arg_variable), metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata !"", metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   ret void
 }
 
@@ -14,3 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !MDSubprogram()
diff --git a/test/Verifier/llvm.dbg.declare-expression.ll b/test/Verifier/llvm.dbg.declare-expression.ll
index 1121e43..0491fdc 100644
--- a/test/Verifier/llvm.dbg.declare-expression.ll
+++ b/test/Verifier/llvm.dbg.declare-expression.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.declare(metadata i32* %s, metadata !MDLocalVariable(tag: DW_TAG_arg_variable), metadata !"")
+  call void @llvm.dbg.declare(metadata i32* %s, metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !"")
   ret void
 }
 
@@ -14,3 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !MDSubprogram()
diff --git a/test/Verifier/llvm.dbg.declare-variable.ll b/test/Verifier/llvm.dbg.declare-variable.ll
index e038243..763f463 100644
--- a/test/Verifier/llvm.dbg.declare-variable.ll
+++ b/test/Verifier/llvm.dbg.declare-variable.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.declare(metadata i32* %s, metadata !"", metadata !MDExpression())
+  call void @llvm.dbg.declare(metadata i32* %s, metadata !"", metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   ret void
 }
 
@@ -14,3 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !MDSubprogram()
diff --git a/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll b/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
new file mode 100644
index 0000000..3615960
--- /dev/null
+++ b/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
@@ -0,0 +1,61 @@
+; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s
+define void @foo() {
+entry:
+  call void @llvm.dbg.value(
+      metadata i8* undef,
+      i64 0,
+      metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !MDExpression())
+; CHECK-LABEL: llvm.dbg.value intrinsic requires a !dbg attachment
+; CHECK-NEXT: call void @llvm.dbg.value({{.*}})
+; CHECK-NEXT: label %entry
+; CHECK-NEXT: void ()* @foo
+
+  call void @llvm.dbg.declare(
+      metadata i8* undef,
+      metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !MDExpression())
+; CHECK-LABEL: llvm.dbg.declare intrinsic requires a !dbg attachment
+; CHECK-NEXT: call void @llvm.dbg.declare({{.*}})
+; CHECK-NEXT: label %entry
+; CHECK-NEXT: void ()* @foo
+
+  call void @llvm.dbg.value(
+      metadata i8* undef,
+      i64 0,
+      metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !MDExpression()),
+    !dbg !MDLocation(scope: !2)
+; CHECK-LABEL: mismatched subprogram between llvm.dbg.value variable and !dbg attachment
+; CHECK-NEXT: call void @llvm.dbg.value({{[^,]+, [^,]+}}, metadata ![[VAR:[0-9]+]], {{[^,]+}}), !dbg ![[LOC:[0-9]+]]
+; CHECK-NEXT: label %entry
+; CHECK-NEXT: void ()* @foo
+; CHECK-NEXT: ![[VAR]] = !MDLocalVariable({{.*}}scope: ![[VARSP:[0-9]+]]
+; CHECK-NEXT: ![[VARSP]] = !MDSubprogram(
+; CHECK-NEXT: ![[LOC]] = !MDLocation({{.*}}scope: ![[LOCSP:[0-9]+]]
+; CHECK-NEXT: ![[LOCSP]] = !MDSubprogram(
+
+  call void @llvm.dbg.declare(
+      metadata i8* undef,
+      metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !MDExpression()),
+    !dbg !MDLocation(scope: !2)
+; CHECK-LABEL: mismatched subprogram between llvm.dbg.declare variable and !dbg attachment
+; CHECK-NEXT: call void @llvm.dbg.declare({{[^,]+}}, metadata ![[VAR:[0-9]+]], {{.*[^,]+}}), !dbg ![[LOC:[0-9]+]]
+; CHECK-NEXT: label %entry
+; CHECK-NEXT: void ()* @foo
+; CHECK-NEXT: ![[VAR]] = !MDLocalVariable({{.*}}scope: ![[VARSP:[0-9]+]]
+; CHECK-NEXT: ![[VARSP]] = !MDSubprogram(
+; CHECK-NEXT: ![[LOC]] = !MDLocation({{.*}}scope: ![[LOCSP:[0-9]+]]
+; CHECK-NEXT: ![[LOCSP]] = !MDSubprogram(
+
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !MDSubprogram(name: "foo")
+!2 = !MDSubprogram(name: "bar")
diff --git a/test/Verifier/llvm.dbg.value-expression.ll b/test/Verifier/llvm.dbg.value-expression.ll
index c0d14a5..18350a1 100644
--- a/test/Verifier/llvm.dbg.value-expression.ll
+++ b/test/Verifier/llvm.dbg.value-expression.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !MDLocalVariable(tag: DW_TAG_arg_variable), metadata !"")
+  call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !""), !dbg !MDLocation(scope: !1)
   ret void
 }
 
@@ -14,3 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !MDSubprogram()
diff --git a/test/Verifier/llvm.dbg.value-value.ll b/test/Verifier/llvm.dbg.value-value.ll
index b64febe..3d82be7 100644
--- a/test/Verifier/llvm.dbg.value-value.ll
+++ b/test/Verifier/llvm.dbg.value-value.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.value(metadata !"", i64 0, metadata !MDLocalVariable(tag: DW_TAG_arg_variable), metadata !MDExpression())
+  call void @llvm.dbg.value(metadata !"", i64 0, metadata !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   ret void
 }
 
@@ -14,3 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !MDSubprogram()
diff --git a/test/Verifier/llvm.dbg.value-variable.ll b/test/Verifier/llvm.dbg.value-variable.ll
index a87a15c..ba6a687 100644
--- a/test/Verifier/llvm.dbg.value-variable.ll
+++ b/test/Verifier/llvm.dbg.value-variable.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !"", metadata !MDExpression())
+  call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !"", metadata !MDExpression()), !dbg !MDLocation(scope: !1)
   ret void
 }
 
@@ -14,3 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !MDSubprogram()
diff --git a/test/Verifier/mdcompositetype-templateparams-tuple.ll b/test/Verifier/mdcompositetype-templateparams-tuple.ll
new file mode 100644
index 0000000..a2e2c78
--- /dev/null
+++ b/test/Verifier/mdcompositetype-templateparams-tuple.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK:      invalid template params
+; CHECK-NEXT: !2 = !MDCompositeType(
+; CHECK-SAME:                       templateParams: !1
+; CHECK-NEXT: !1 = !MDTemplateTypeParameter(
+
+!named = !{!0, !1, !2}
+!0 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!1 = !MDTemplateTypeParameter(name: "T", type: !0)
+!2 = !MDCompositeType(tag: DW_TAG_structure_type, name: "IntTy", size: 32, align: 32, templateParams: !1)
diff --git a/test/Verifier/mdcompositetype-templateparams.ll b/test/Verifier/mdcompositetype-templateparams.ll
new file mode 100644
index 0000000..72909e3
--- /dev/null
+++ b/test/Verifier/mdcompositetype-templateparams.ll
@@ -0,0 +1,12 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK:      invalid template parameter
+; CHECK-NEXT: !2 = !MDCompositeType(
+; CHECK-SAME:                       templateParams: !1
+; CHECK-NEXT: !1 = !{!0}
+; CHECK-NEXT: !0 = !MDBasicType(
+
+!named = !{!0, !1, !2}
+!0 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!1 = !{!0}
+!2 = !MDCompositeType(tag: DW_TAG_structure_type, name: "IntTy", size: 32, align: 32, templateParams: !1)
diff --git a/test/Verifier/musttail-invalid.ll b/test/Verifier/musttail-invalid.ll
index e5f9a40..78017db 100644
--- a/test/Verifier/musttail-invalid.ll
+++ b/test/Verifier/musttail-invalid.ll
@@ -26,7 +26,7 @@ define void @mismatched_intty(i32) {
 declare void @mismatched_vararg_callee(i8*, ...)
 define void @mismatched_vararg(i8*) {
 ; CHECK: mismatched varargs
-  musttail call void (i8*, ...)* @mismatched_vararg_callee(i8* null)
+  musttail call void (i8*, ...) @mismatched_vararg_callee(i8* null)
   ret void
 }
 
diff --git a/test/Verifier/musttail-valid.ll b/test/Verifier/musttail-valid.ll
index bdc0c8c..150e949 100644
--- a/test/Verifier/musttail-valid.ll
+++ b/test/Verifier/musttail-valid.ll
@@ -17,13 +17,13 @@ define i32* @similar_ret_ptrty() {
 
 declare x86_thiscallcc void @varargs_thiscall(i8*, ...)
 define x86_thiscallcc void @varargs_thiscall_thunk(i8* %this, ...) {
-  musttail call x86_thiscallcc void (i8*, ...)* @varargs_thiscall(i8* %this, ...)
+  musttail call x86_thiscallcc void (i8*, ...) @varargs_thiscall(i8* %this, ...)
   ret void
 }
 
 declare x86_fastcallcc void @varargs_fastcall(i8*, ...)
 define x86_fastcallcc void @varargs_fastcall_thunk(i8* %this, ...) {
-  musttail call x86_fastcallcc void (i8*, ...)* @varargs_fastcall(i8* %this, ...)
+  musttail call x86_fastcallcc void (i8*, ...) @varargs_fastcall(i8* %this, ...)
   ret void
 }
 
@@ -32,7 +32,7 @@ define x86_thiscallcc void @varargs_thiscall_unreachable(i8* %this, ...) {
 }
 
 define x86_thiscallcc void @varargs_thiscall_ret_unreachable(i8* %this, ...) {
-  musttail call x86_thiscallcc void (i8*, ...)* @varargs_thiscall(i8* %this, ...)
+  musttail call x86_thiscallcc void (i8*, ...) @varargs_thiscall(i8* %this, ...)
   ret void
 bb1:
   ret void
diff --git a/test/Verifier/statepoint.ll b/test/Verifier/statepoint.ll
index 9342309..61d8b77 100644
--- a/test/Verifier/statepoint.ll
+++ b/test/Verifier/statepoint.ll
@@ -10,7 +10,7 @@ declare i32 @"personality_function"()
 define i64 addrspace(1)* @test1(i8 addrspace(1)* %arg) gc "statepoint-example" {
 entry:
   %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
   %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 9, i32 10)
   ;; It is perfectly legal to relocate the same value multiple times...
   %reloc2 = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 9, i32 10)
@@ -39,7 +39,7 @@ notequal:
   ret void
 
 equal:
-  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %safepoint_token = call i32 (void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
   %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 9, i32 10)
   call void undef(i64 addrspace(1)* %reloc)
   ret void
diff --git a/test/Verifier/varargs-intrinsic.ll b/test/Verifier/varargs-intrinsic.ll
index 2fff1db..26fe61f 100644
--- a/test/Verifier/varargs-intrinsic.ll
+++ b/test/Verifier/varargs-intrinsic.ll
@@ -10,7 +10,7 @@ define void @foo1() {
 }
 
 define void @foo2() {
-  call void (...)* @llvm.donothing(i64 0, i64 1)
+  call void (...) @llvm.donothing(i64 0, i64 1)
 ; CHECK: Intrinsic was not defined with variable arguments!
   ret void
 }
diff --git a/test/tools/llvm-objdump/AArch64/Inputs/print-mrs.obj.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/print-mrs.obj.macho-aarch64
new file mode 100644
index 0000000..06cb13d
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/Inputs/print-mrs.obj.macho-aarch64
diff --git a/test/tools/llvm-objdump/AArch64/macho-print-mrs.test b/test/tools/llvm-objdump/AArch64/macho-print-mrs.test
new file mode 100644
index 0000000..cc1d14f
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/macho-print-mrs.test
@@ -0,0 +1,3 @@
+RUN: llvm-objdump -d -m -no-show-raw-insn %p/Inputs/print-mrs.obj.macho-aarch64 | FileCheck %s
+
+CHECK: 0:  mrs x0, S3_7_C15_C2_0
diff --git a/test/tools/llvm-objdump/X86/Inputs/Objc1.32bit.exe.macho-i386 b/test/tools/llvm-objdump/X86/Inputs/Objc1.32bit.exe.macho-i386
new file mode 100755
index 0000000..72b7ea8
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/Objc1.32bit.exe.macho-i386
diff --git a/test/tools/llvm-objdump/X86/Inputs/Objc1.32bit.obj.macho-i386 b/test/tools/llvm-objdump/X86/Inputs/Objc1.32bit.obj.macho-i386
new file mode 100644
index 0000000..7f62ad1
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/Objc1.32bit.obj.macho-i386
diff --git a/test/tools/llvm-objdump/X86/Inputs/Objc2.32bit.exe.macho-i386 b/test/tools/llvm-objdump/X86/Inputs/Objc2.32bit.exe.macho-i386
new file mode 100755
index 0000000..b44c7dc
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/Objc2.32bit.exe.macho-i386
diff --git a/test/tools/llvm-objdump/X86/Inputs/Objc2.32bit.obj.macho-i386 b/test/tools/llvm-objdump/X86/Inputs/Objc2.32bit.obj.macho-i386
new file mode 100644
index 0000000..5144964
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/Objc2.32bit.obj.macho-i386
diff --git a/test/tools/llvm-objdump/X86/Inputs/Objc2.64bit.exe.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/Objc2.64bit.exe.macho-x86_64
new file mode 100755
index 0000000..5cee077
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/Objc2.64bit.exe.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/Objc2.64bit.obj.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/Objc2.64bit.obj.macho-x86_64
new file mode 100644
index 0000000..5734780
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/Objc2.64bit.obj.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/macho-objc-meta-data.test b/test/tools/llvm-objdump/X86/macho-objc-meta-data.test
new file mode 100644
index 0000000..f4abf6c
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/macho-objc-meta-data.test
@@ -0,0 +1,1039 @@
+# RUN: llvm-objdump -m -objc-meta-data %p/Inputs/Objc2.64bit.exe.macho-x86_64 | FileCheck %s -check-prefix=OBJC2_64BIT_EXE
+# RUN: llvm-objdump -m -objc-meta-data %p/Inputs/Objc2.64bit.obj.macho-x86_64 | FileCheck %s -check-prefix=OBJC2_64BIT_OBJ
+# RUN: llvm-objdump -m -objc-meta-data %p/Inputs/Objc2.32bit.exe.macho-i386 | FileCheck %s -check-prefix=OBJC2_32BIT_EXE
+# RUN: llvm-objdump -m -objc-meta-data %p/Inputs/Objc2.32bit.obj.macho-i386 | FileCheck %s -check-prefix=OBJC2_32BIT_OBJ
+# RUN: llvm-objdump -m -objc-meta-data %p/Inputs/Objc1.32bit.exe.macho-i386 | FileCheck %s -check-prefix=OBJC1_32BIT_EXE
+# RUN: llvm-objdump -m -objc-meta-data %p/Inputs/Objc1.32bit.obj.macho-i386 | FileCheck %s -check-prefix=OBJC1_32BIT_OBJ
+# RUN: llvm-objdump -m -section __OBJC,__protocol %p/Inputs/Objc1.32bit.exe.macho-i386 | FileCheck %s -check-prefix=PROTOCOL
+
+OBJC2_64BIT_EXE: Contents of (__DATA,__objc_classlist) section
+OBJC2_64BIT_EXE: 0000000100002028 0x1000029f0
+OBJC2_64BIT_EXE:            isa 0x100002a18
+OBJC2_64BIT_EXE:     superclass 0x0
+OBJC2_64BIT_EXE:          cache 0x0
+OBJC2_64BIT_EXE:         vtable 0x0
+OBJC2_64BIT_EXE:           data 0x1000020d0 (struct class_ro_t *)
+OBJC2_64BIT_EXE:                     flags 0x80
+OBJC2_64BIT_EXE:             instanceStart 100
+OBJC2_64BIT_EXE:              instanceSize 100
+OBJC2_64BIT_EXE:                  reserved 0x0
+OBJC2_64BIT_EXE:                ivarLayout 0x0
+OBJC2_64BIT_EXE:                      name 0x100001b0e ViewController
+OBJC2_64BIT_EXE:               baseMethods 0x100002098 (struct method_list_t *)
+OBJC2_64BIT_EXE: 		   entsize 24
+OBJC2_64BIT_EXE: 		     count 2
+OBJC2_64BIT_EXE: 		      name 0x1000014ee viewDidLoad
+OBJC2_64BIT_EXE: 		     types 0x100001b48 v16@0:8
+OBJC2_64BIT_EXE: 		       imp 0x100001350 
+OBJC2_64BIT_EXE: 		      name 0x1000014fa setRepresentedObject:
+OBJC2_64BIT_EXE: 		     types 0x100001b50 v24@0:8@16
+OBJC2_64BIT_EXE: 		       imp 0x100001390 
+OBJC2_64BIT_EXE:             baseProtocols 0x0
+OBJC2_64BIT_EXE:                     ivars 0x0
+OBJC2_64BIT_EXE:            weakIvarLayout 0x0
+OBJC2_64BIT_EXE:            baseProperties 0x0
+OBJC2_64BIT_EXE: Meta Class
+OBJC2_64BIT_EXE:            isa 0x0
+OBJC2_64BIT_EXE:     superclass 0x0
+OBJC2_64BIT_EXE:          cache 0x0
+OBJC2_64BIT_EXE:         vtable 0x0
+OBJC2_64BIT_EXE:           data 0x100002050 (struct class_ro_t *)
+OBJC2_64BIT_EXE:                     flags 0x81 RO_META
+OBJC2_64BIT_EXE:             instanceStart 40
+OBJC2_64BIT_EXE:              instanceSize 40
+OBJC2_64BIT_EXE:                  reserved 0x0
+OBJC2_64BIT_EXE:                ivarLayout 0x0
+OBJC2_64BIT_EXE:                      name 0x100001b0e ViewController
+OBJC2_64BIT_EXE:               baseMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_EXE:             baseProtocols 0x0
+OBJC2_64BIT_EXE:                     ivars 0x0
+OBJC2_64BIT_EXE:            weakIvarLayout 0x0
+OBJC2_64BIT_EXE:            baseProperties 0x0
+OBJC2_64BIT_EXE: 0000000100002030 0x100002a68
+OBJC2_64BIT_EXE:            isa 0x100002a40
+OBJC2_64BIT_EXE:     superclass 0x0
+OBJC2_64BIT_EXE:          cache 0x0
+OBJC2_64BIT_EXE:         vtable 0x0
+OBJC2_64BIT_EXE:           data 0x100002990 (struct class_ro_t *)
+OBJC2_64BIT_EXE:                     flags 0x80
+OBJC2_64BIT_EXE:             instanceStart 8
+OBJC2_64BIT_EXE:              instanceSize 8
+OBJC2_64BIT_EXE:                  reserved 0x0
+OBJC2_64BIT_EXE:                ivarLayout 0x0
+OBJC2_64BIT_EXE:                      name 0x100001b1d AppDelegate
+OBJC2_64BIT_EXE:               baseMethods 0x100002910 (struct method_list_t *)
+OBJC2_64BIT_EXE: 		   entsize 24
+OBJC2_64BIT_EXE: 		     count 2
+OBJC2_64BIT_EXE: 		      name 0x100001885 applicationDidFinishLaunching:
+OBJC2_64BIT_EXE: 		     types 0x100001b50 v24@0:8@16
+OBJC2_64BIT_EXE: 		       imp 0x100001430 
+OBJC2_64BIT_EXE: 		      name 0x100001999 applicationWillTerminate:
+OBJC2_64BIT_EXE: 		     types 0x100001b50 v24@0:8@16
+OBJC2_64BIT_EXE: 		       imp 0x100001470 
+OBJC2_64BIT_EXE:             baseProtocols 0x1000028b0
+OBJC2_64BIT_EXE:                       count 1
+OBJC2_64BIT_EXE: 		      list[0] 0x100002ae0 (struct protocol_t *)
+OBJC2_64BIT_EXE: 			      isa 0x0
+OBJC2_64BIT_EXE: 			     name 0x100001b29 NSApplicationDelegate
+OBJC2_64BIT_EXE: 			protocols 0x1000023f0
+OBJC2_64BIT_EXE: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_EXE: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_EXE: 	  optionalInstanceMethods 0x100002408
+OBJC2_64BIT_EXE: 	     optionalClassMethods 0x0
+OBJC2_64BIT_EXE: 	       instanceProperties 0x0
+OBJC2_64BIT_EXE:                     ivars 0x0
+OBJC2_64BIT_EXE:            weakIvarLayout 0x0
+OBJC2_64BIT_EXE:            baseProperties 0x100002948
+OBJC2_64BIT_EXE:                     entsize 16
+OBJC2_64BIT_EXE:                       count 4
+OBJC2_64BIT_EXE: 			     name 0x100001f25 hash
+OBJC2_64BIT_EXE: 			attributes 0x100001f2a TQ,R
+OBJC2_64BIT_EXE: 			     name 0x100001f2f superclass
+OBJC2_64BIT_EXE: 			attributes 0x100001f3a T#,R
+OBJC2_64BIT_EXE: 			     name 0x100001f3f description
+OBJC2_64BIT_EXE: 			attributes 0x100001f4b T@"NSString",R,C
+OBJC2_64BIT_EXE: 			     name 0x100001f5c debugDescription
+OBJC2_64BIT_EXE: 			attributes 0x100001f4b T@"NSString",R,C
+OBJC2_64BIT_EXE: Meta Class
+OBJC2_64BIT_EXE:            isa 0x0
+OBJC2_64BIT_EXE:     superclass 0x0
+OBJC2_64BIT_EXE:          cache 0x0
+OBJC2_64BIT_EXE:         vtable 0x0
+OBJC2_64BIT_EXE:           data 0x1000028c8 (struct class_ro_t *)
+OBJC2_64BIT_EXE:                     flags 0x81 RO_META
+OBJC2_64BIT_EXE:             instanceStart 40
+OBJC2_64BIT_EXE:              instanceSize 40
+OBJC2_64BIT_EXE:                  reserved 0x0
+OBJC2_64BIT_EXE:                ivarLayout 0x0
+OBJC2_64BIT_EXE:                      name 0x100001b1d AppDelegate
+OBJC2_64BIT_EXE:               baseMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_EXE:             baseProtocols 0x1000028b0
+OBJC2_64BIT_EXE:                       count 1
+OBJC2_64BIT_EXE: 		      list[0] 0x100002ae0 (struct protocol_t *)
+OBJC2_64BIT_EXE: 			      isa 0x0
+OBJC2_64BIT_EXE: 			     name 0x100001b29 NSApplicationDelegate
+OBJC2_64BIT_EXE: 			protocols 0x1000023f0
+OBJC2_64BIT_EXE: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_EXE: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_EXE: 	  optionalInstanceMethods 0x100002408
+OBJC2_64BIT_EXE: 	     optionalClassMethods 0x0
+OBJC2_64BIT_EXE: 	       instanceProperties 0x0
+OBJC2_64BIT_EXE:                     ivars 0x0
+OBJC2_64BIT_EXE:            weakIvarLayout 0x0
+OBJC2_64BIT_EXE:            baseProperties 0x0
+OBJC2_64BIT_EXE: Contents of (__DATA,__objc_superrefs) section
+OBJC2_64BIT_EXE: 00000001000029e8 0x1000029f0
+OBJC2_64BIT_EXE: Contents of (__DATA,__objc_protolist) section
+OBJC2_64BIT_EXE: 0000000100002038 0x100002a90
+OBJC2_64BIT_EXE: 0000000100002040 0x100002ae0
+OBJC2_64BIT_EXE: Contents of (__DATA,__objc_imageinfo) section
+OBJC2_64BIT_EXE:   version 0
+OBJC2_64BIT_EXE:     flags 0x0
+
+OBJC2_64BIT_OBJ: Contents of (__DATA,__objc_classlist) section
+OBJC2_64BIT_OBJ: 0000000000001b00 0x1ad8 _OBJC_CLASS_$_AppDelegate
+OBJC2_64BIT_OBJ:            isa 0x0 _OBJC_METACLASS_$_AppDelegate
+OBJC2_64BIT_OBJ:     superclass 0x0 _OBJC_CLASS_$_NSObject
+OBJC2_64BIT_OBJ:          cache 0x0 __objc_empty_cache
+OBJC2_64BIT_OBJ:         vtable 0x0
+OBJC2_64BIT_OBJ:           data l_OBJC_CLASS_RO_$_AppDelegate (struct class_ro_t *)
+OBJC2_64BIT_OBJ:                     flags 0x80
+OBJC2_64BIT_OBJ:             instanceStart 8
+OBJC2_64BIT_OBJ:              instanceSize 8
+OBJC2_64BIT_OBJ:                  reserved 0x0
+OBJC2_64BIT_OBJ:                ivarLayout 0x0
+OBJC2_64BIT_OBJ:                      name 0x6f8 AppDelegate
+OBJC2_64BIT_OBJ:               baseMethods l_OBJC_$_INSTANCE_METHODS_AppDelegate (struct method_list_t *)
+OBJC2_64BIT_OBJ: 		   entsize 24
+OBJC2_64BIT_OBJ: 		     count 2
+OBJC2_64BIT_OBJ: 		      name 0xa98 applicationDidFinishLaunching:
+OBJC2_64BIT_OBJ: 		     types 0x102e v24@0:8@16
+OBJC2_64BIT_OBJ: 		       imp -[AppDelegate applicationDidFinishLaunching:]
+OBJC2_64BIT_OBJ: 		      name 0xbac applicationWillTerminate:
+OBJC2_64BIT_OBJ: 		     types 0x102e v24@0:8@16
+OBJC2_64BIT_OBJ: 		       imp -[AppDelegate applicationWillTerminate:]
+OBJC2_64BIT_OBJ:             baseProtocols l_OBJC_CLASS_PROTOCOLS_$_AppDelegate
+OBJC2_64BIT_OBJ:                       count 1
+OBJC2_64BIT_OBJ: 		      list[0] l_OBJC_PROTOCOL_$_NSApplicationDelegate (struct protocol_t *)
+OBJC2_64BIT_OBJ: 			      isa 0x0
+OBJC2_64BIT_OBJ: 			     name 0x704 NSApplicationDelegate
+OBJC2_64BIT_OBJ: 			protocols 0x0
+OBJC2_64BIT_OBJ: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_OBJ: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_OBJ: 	  optionalInstanceMethods 0x0
+OBJC2_64BIT_OBJ: 	     optionalClassMethods 0x0
+OBJC2_64BIT_OBJ: 	       instanceProperties 0x0
+OBJC2_64BIT_OBJ:                     ivars 0x0
+OBJC2_64BIT_OBJ:            weakIvarLayout 0x0
+OBJC2_64BIT_OBJ:            baseProperties l_OBJC_$_PROP_LIST_AppDelegate
+OBJC2_64BIT_OBJ:                     entsize 16
+OBJC2_64BIT_OBJ:                       count 4
+OBJC2_64BIT_OBJ: 			     name 0x19b8 hash
+OBJC2_64BIT_OBJ: 			attributes 0x19bd TQ,R
+OBJC2_64BIT_OBJ: 			     name 0x19c2 superclass
+OBJC2_64BIT_OBJ: 			attributes 0x19cd T#,R
+OBJC2_64BIT_OBJ: 			     name 0x19d2 description
+OBJC2_64BIT_OBJ: 			attributes 0x19de T@"NSString",R,C
+OBJC2_64BIT_OBJ: 			     name 0x19ef debugDescription
+OBJC2_64BIT_OBJ: 			attributes 0x19de T@"NSString",R,C
+OBJC2_64BIT_OBJ: Meta Class
+OBJC2_64BIT_OBJ:            isa 0x0 _OBJC_METACLASS_$_NSObject
+OBJC2_64BIT_OBJ:     superclass 0x0 _OBJC_METACLASS_$_NSObject
+OBJC2_64BIT_OBJ:          cache 0x0 __objc_empty_cache
+OBJC2_64BIT_OBJ:         vtable 0x0
+OBJC2_64BIT_OBJ:           data l_OBJC_METACLASS_RO_$_AppDelegate (struct class_ro_t *)
+OBJC2_64BIT_OBJ:                     flags 0x81 RO_META
+OBJC2_64BIT_OBJ:             instanceStart 40
+OBJC2_64BIT_OBJ:              instanceSize 40
+OBJC2_64BIT_OBJ:                  reserved 0x0
+OBJC2_64BIT_OBJ:                ivarLayout 0x0
+OBJC2_64BIT_OBJ:                      name 0x6f8 AppDelegate
+OBJC2_64BIT_OBJ:               baseMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_OBJ:             baseProtocols l_OBJC_CLASS_PROTOCOLS_$_AppDelegate
+OBJC2_64BIT_OBJ:                       count 1
+OBJC2_64BIT_OBJ: 		      list[0] l_OBJC_PROTOCOL_$_NSApplicationDelegate (struct protocol_t *)
+OBJC2_64BIT_OBJ: 			      isa 0x0
+OBJC2_64BIT_OBJ: 			     name 0x704 NSApplicationDelegate
+OBJC2_64BIT_OBJ: 			protocols 0x0
+OBJC2_64BIT_OBJ: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_OBJ: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_64BIT_OBJ: 	  optionalInstanceMethods 0x0
+OBJC2_64BIT_OBJ: 	     optionalClassMethods 0x0
+OBJC2_64BIT_OBJ: 	       instanceProperties 0x0
+OBJC2_64BIT_OBJ:                     ivars 0x0
+OBJC2_64BIT_OBJ:            weakIvarLayout 0x0
+OBJC2_64BIT_OBJ:            baseProperties 0x0
+OBJC2_64BIT_OBJ: Contents of (__DATA,__objc_protolist) section
+OBJC2_64BIT_OBJ: 0000000000001aa0 0x1a00 l_OBJC_PROTOCOL_$_NSObject
+OBJC2_64BIT_OBJ: 0000000000001aa8 0x1a50 l_OBJC_PROTOCOL_$_NSApplicationDelegate
+OBJC2_64BIT_OBJ: Contents of (__DATA,__objc_imageinfo) section
+OBJC2_64BIT_OBJ:   version 0
+OBJC2_64BIT_OBJ:     flags 0x0
+
+OBJC2_32BIT_EXE: Objective-C segment
+OBJC2_32BIT_EXE: Contents of (__DATA,__objc_classlist) section
+OBJC2_32BIT_EXE: 00006068 0x6a84
+OBJC2_32BIT_EXE:            isa 0x6a70
+OBJC2_32BIT_EXE:     superclass 0x0
+OBJC2_32BIT_EXE:          cache 0x0
+OBJC2_32BIT_EXE:         vtable 0x0
+OBJC2_32BIT_EXE:           data 0x66e0 (struct class_ro_t *)
+OBJC2_32BIT_EXE:                     flags 0x184 RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_EXE:             instanceStart 4
+OBJC2_32BIT_EXE:              instanceSize 8
+OBJC2_32BIT_EXE:                ivarLayout 0x52c2
+OBJC2_32BIT_EXE:                 layout map: 0x01 
+OBJC2_32BIT_EXE:                      name 0x5279 AppDelegate
+OBJC2_32BIT_EXE:               baseMethods 0x6614 (struct method_list_t *)
+OBJC2_32BIT_EXE: 		   entsize 12
+OBJC2_32BIT_EXE: 		     count 10
+OBJC2_32BIT_EXE: 		      name 0x454c application:didFinishLaunchingWithOptions:
+OBJC2_32BIT_EXE: 		     types 0x562b c16@0:4@8@12
+OBJC2_32BIT_EXE: 		       imp 0x23c0
+OBJC2_32BIT_EXE: 		      name 0x4593 applicationWillResignActive:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x25f0
+OBJC2_32BIT_EXE: 		      name 0x4a6a applicationDidEnterBackground:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x2640
+OBJC2_32BIT_EXE: 		      name 0x4a89 applicationWillEnterForeground:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x2690
+OBJC2_32BIT_EXE: 		      name 0x4577 applicationDidBecomeActive:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x26e0
+OBJC2_32BIT_EXE: 		      name 0x463e applicationWillTerminate:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x2730
+OBJC2_32BIT_EXE: 		      name 0x42da splitViewController:collapseSecondaryViewController:ontoPrimaryViewController:
+OBJC2_32BIT_EXE: 		     types 0x5351 c20@0:4@8@12@16
+OBJC2_32BIT_EXE: 		       imp 0x2780
+OBJC2_32BIT_EXE: 		      name 0x4e21 .cxx_destruct
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x2a70
+OBJC2_32BIT_EXE: 		      name 0x40fc window
+OBJC2_32BIT_EXE: 		     types 0x5c80 @8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x2a00
+OBJC2_32BIT_EXE: 		      name 0x4d1a setWindow:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x2a30
+OBJC2_32BIT_EXE:             baseProtocols 0x65dc
+OBJC2_32BIT_EXE:                       count 2
+OBJC2_32BIT_EXE: 		      list[0] 0x6ae8 (struct protocol_t *)
+OBJC2_32BIT_EXE: 			      isa 0x0
+OBJC2_32BIT_EXE: 			     name 0x5285 UISplitViewControllerDelegate
+OBJC2_32BIT_EXE: 			protocols 0x0
+OBJC2_32BIT_EXE: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 	  optionalInstanceMethods 0x6088
+OBJC2_32BIT_EXE: 	     optionalClassMethods 0x0
+OBJC2_32BIT_EXE: 	       instanceProperties 0x0
+OBJC2_32BIT_EXE: 		      list[1] 0x6b40 (struct protocol_t *)
+OBJC2_32BIT_EXE: 			      isa 0x0
+OBJC2_32BIT_EXE: 			     name 0x52a3 UIApplicationDelegate
+OBJC2_32BIT_EXE: 			protocols 0x62e8
+OBJC2_32BIT_EXE: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 	  optionalInstanceMethods 0x62f4
+OBJC2_32BIT_EXE: 	     optionalClassMethods 0x0
+OBJC2_32BIT_EXE: 	       instanceProperties 0x6518
+OBJC2_32BIT_EXE:                     ivars 0x6694
+OBJC2_32BIT_EXE:                     entsize 20
+OBJC2_32BIT_EXE:                       count 1
+OBJC2_32BIT_EXE: 			   offset 0x6a5c 4
+OBJC2_32BIT_EXE: 			     name 0x4e2f _window
+OBJC2_32BIT_EXE: 			     type 0x5d4b @"UIWindow"
+OBJC2_32BIT_EXE: 			alignment 2
+OBJC2_32BIT_EXE: 			     size 4
+OBJC2_32BIT_EXE:            weakIvarLayout 0x0
+OBJC2_32BIT_EXE:            baseProperties 0x66b0
+OBJC2_32BIT_EXE:                     entsize 8
+OBJC2_32BIT_EXE:                       count 5
+OBJC2_32BIT_EXE: 			     name 0x5df3 window
+OBJC2_32BIT_EXE: 			attributes 0x5e0b T@"UIWindow",&,N,V_window
+OBJC2_32BIT_EXE: 			     name 0x5dab hash
+OBJC2_32BIT_EXE: 			attributes 0x5db0 TI,R
+OBJC2_32BIT_EXE: 			     name 0x5db5 superclass
+OBJC2_32BIT_EXE: 			attributes 0x5dc0 T#,R
+OBJC2_32BIT_EXE: 			     name 0x5dc5 description
+OBJC2_32BIT_EXE: 			attributes 0x5dd1 T@"NSString",R,C
+OBJC2_32BIT_EXE: 			     name 0x5de2 debugDescription
+OBJC2_32BIT_EXE: 			attributes 0x5dd1 T@"NSString",R,C
+OBJC2_32BIT_EXE: Meta Class
+OBJC2_32BIT_EXE:            isa 0x0
+OBJC2_32BIT_EXE:     superclass 0x0
+OBJC2_32BIT_EXE:          cache 0x0
+OBJC2_32BIT_EXE:         vtable 0x0
+OBJC2_32BIT_EXE:           data 0x65ec (struct class_ro_t *)
+OBJC2_32BIT_EXE:                     flags 0x185 RO_META RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_EXE:             instanceStart 20
+OBJC2_32BIT_EXE:              instanceSize 20
+OBJC2_32BIT_EXE:                ivarLayout 0x0
+OBJC2_32BIT_EXE:                      name 0x5279 AppDelegate
+OBJC2_32BIT_EXE:               baseMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE:             baseProtocols 0x65dc
+OBJC2_32BIT_EXE:                       count 2
+OBJC2_32BIT_EXE: 		      list[0] 0x6ae8 (struct protocol_t *)
+OBJC2_32BIT_EXE: 			      isa 0x0
+OBJC2_32BIT_EXE: 			     name 0x5285 UISplitViewControllerDelegate
+OBJC2_32BIT_EXE: 			protocols 0x0
+OBJC2_32BIT_EXE: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 	  optionalInstanceMethods 0x6088
+OBJC2_32BIT_EXE: 	     optionalClassMethods 0x0
+OBJC2_32BIT_EXE: 	       instanceProperties 0x0
+OBJC2_32BIT_EXE: 		      list[1] 0x6b40 (struct protocol_t *)
+OBJC2_32BIT_EXE: 			      isa 0x0
+OBJC2_32BIT_EXE: 			     name 0x52a3 UIApplicationDelegate
+OBJC2_32BIT_EXE: 			protocols 0x62e8
+OBJC2_32BIT_EXE: 		  instanceMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 		     classMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE: 	  optionalInstanceMethods 0x62f4
+OBJC2_32BIT_EXE: 	     optionalClassMethods 0x0
+OBJC2_32BIT_EXE: 	       instanceProperties 0x6518
+OBJC2_32BIT_EXE:                     ivars 0x0
+OBJC2_32BIT_EXE:            weakIvarLayout 0x0
+OBJC2_32BIT_EXE:            baseProperties 0x0
+OBJC2_32BIT_EXE: 0000606c 0x6a98
+OBJC2_32BIT_EXE:            isa 0x6aac
+OBJC2_32BIT_EXE:     superclass 0x0
+OBJC2_32BIT_EXE:          cache 0x0
+OBJC2_32BIT_EXE:         vtable 0x0
+OBJC2_32BIT_EXE:           data 0x6838 (struct class_ro_t *)
+OBJC2_32BIT_EXE:                     flags 0x184 RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_EXE:             instanceStart 4
+OBJC2_32BIT_EXE:              instanceSize 12
+OBJC2_32BIT_EXE:                ivarLayout 0x52d9
+OBJC2_32BIT_EXE:                 layout map: 0x02 
+OBJC2_32BIT_EXE:                      name 0x52c4 MasterViewController
+OBJC2_32BIT_EXE:               baseMethods 0x6730 (struct method_list_t *)
+OBJC2_32BIT_EXE: 		   entsize 12
+OBJC2_32BIT_EXE: 		     count 15
+OBJC2_32BIT_EXE: 		      name 0x4e37 awakeFromNib
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x2ab0
+OBJC2_32BIT_EXE: 		      name 0x4ea2 viewDidLoad
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x2c20
+OBJC2_32BIT_EXE: 		      name 0x4f43 didReceiveMemoryWarning
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x2e80
+OBJC2_32BIT_EXE: 		      name 0x4ec3 insertNewObject:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x2ed0
+OBJC2_32BIT_EXE: 		      name 0x5119 prepareForSegue:sender:
+OBJC2_32BIT_EXE: 		     types 0x57b1 v16@0:4@8@12
+OBJC2_32BIT_EXE: 		       imp 0x3160
+OBJC2_32BIT_EXE: 		      name 0x5131 numberOfSectionsInTableView:
+OBJC2_32BIT_EXE: 		     types 0x5326 i12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x34c0
+OBJC2_32BIT_EXE: 		      name 0x514e tableView:numberOfRowsInSection:
+OBJC2_32BIT_EXE: 		     types 0x5d57 i16@0:4@8i12
+OBJC2_32BIT_EXE: 		       imp 0x3520
+OBJC2_32BIT_EXE: 		      name 0x516f tableView:cellForRowAtIndexPath:
+OBJC2_32BIT_EXE: 		     types 0x5422 @16@0:4@8@12
+OBJC2_32BIT_EXE: 		       imp 0x35e0
+OBJC2_32BIT_EXE: 		      name 0x5190 tableView:canEditRowAtIndexPath:
+OBJC2_32BIT_EXE: 		     types 0x562b c16@0:4@8@12
+OBJC2_32BIT_EXE: 		       imp 0x37e0
+OBJC2_32BIT_EXE: 		      name 0x51b1 tableView:commitEditingStyle:forRowAtIndexPath:
+OBJC2_32BIT_EXE: 		     types 0x5d64 v20@0:4@8i12@16
+OBJC2_32BIT_EXE: 		       imp 0x3880
+OBJC2_32BIT_EXE: 		      name 0x4e21 .cxx_destruct
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3b40
+OBJC2_32BIT_EXE: 		      name 0x51e1 detailViewController
+OBJC2_32BIT_EXE: 		     types 0x5c80 @8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3a30
+OBJC2_32BIT_EXE: 		      name 0x4f2a setDetailViewController:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x3a60
+OBJC2_32BIT_EXE: 		      name 0x4f5b objects
+OBJC2_32BIT_EXE: 		     types 0x5c80 @8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3aa0
+OBJC2_32BIT_EXE: 		      name 0x4f68 setObjects:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x3af0
+OBJC2_32BIT_EXE:             baseProtocols 0x0
+OBJC2_32BIT_EXE:                     ivars 0x67ec
+OBJC2_32BIT_EXE:                     entsize 20
+OBJC2_32BIT_EXE:                       count 2
+OBJC2_32BIT_EXE: 			   offset 0x6a60 4
+OBJC2_32BIT_EXE: 			     name 0x51f6 _detailViewController
+OBJC2_32BIT_EXE: 			     type 0x5d74 @"DetailViewController"
+OBJC2_32BIT_EXE: 			alignment 2
+OBJC2_32BIT_EXE: 			     size 4
+OBJC2_32BIT_EXE: 			   offset 0x6a64 8
+OBJC2_32BIT_EXE: 			     name 0x520c _objects
+OBJC2_32BIT_EXE: 			     type 0x5d8c @"NSMutableArray"
+OBJC2_32BIT_EXE: 			alignment 2
+OBJC2_32BIT_EXE: 			     size 4
+OBJC2_32BIT_EXE:            weakIvarLayout 0x0
+OBJC2_32BIT_EXE:            baseProperties 0x6820
+OBJC2_32BIT_EXE:                     entsize 8
+OBJC2_32BIT_EXE:                       count 2
+OBJC2_32BIT_EXE: 			     name 0x5e35 detailViewController
+OBJC2_32BIT_EXE: 			attributes 0x5e4a T@"DetailViewController",&,N,V_detailViewController
+OBJC2_32BIT_EXE: 			     name 0x5e7e objects
+OBJC2_32BIT_EXE: 			attributes 0x5e86 T@"NSMutableArray",&,V_objects
+OBJC2_32BIT_EXE: Meta Class
+OBJC2_32BIT_EXE:            isa 0x0
+OBJC2_32BIT_EXE:     superclass 0x0
+OBJC2_32BIT_EXE:          cache 0x0
+OBJC2_32BIT_EXE:         vtable 0x0
+OBJC2_32BIT_EXE:           data 0x6708 (struct class_ro_t *)
+OBJC2_32BIT_EXE:                     flags 0x185 RO_META RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_EXE:             instanceStart 20
+OBJC2_32BIT_EXE:              instanceSize 20
+OBJC2_32BIT_EXE:                ivarLayout 0x0
+OBJC2_32BIT_EXE:                      name 0x52c4 MasterViewController
+OBJC2_32BIT_EXE:               baseMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE:             baseProtocols 0x0
+OBJC2_32BIT_EXE:                     ivars 0x0
+OBJC2_32BIT_EXE:            weakIvarLayout 0x0
+OBJC2_32BIT_EXE:            baseProperties 0x0
+OBJC2_32BIT_EXE: 00006070 0x6ac0
+OBJC2_32BIT_EXE:            isa 0x6ad4
+OBJC2_32BIT_EXE:     superclass 0x0
+OBJC2_32BIT_EXE:          cache 0x0
+OBJC2_32BIT_EXE:         vtable 0x0
+OBJC2_32BIT_EXE:           data 0x6938 (struct class_ro_t *)
+OBJC2_32BIT_EXE:                     flags 0x184 RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_EXE:             instanceStart 4
+OBJC2_32BIT_EXE:              instanceSize 12
+OBJC2_32BIT_EXE:                ivarLayout 0x52f0
+OBJC2_32BIT_EXE:                 layout map: 0x01 0x10 
+OBJC2_32BIT_EXE:                      name 0x52db DetailViewController
+OBJC2_32BIT_EXE:               baseMethods 0x6888 (struct method_list_t *)
+OBJC2_32BIT_EXE: 		   entsize 12
+OBJC2_32BIT_EXE: 		     count 8
+OBJC2_32BIT_EXE: 		      name 0x5061 setDetailItem:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x3c70
+OBJC2_32BIT_EXE: 		      name 0x5215 configureView
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3d20
+OBJC2_32BIT_EXE: 		      name 0x4ea2 viewDidLoad
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3e20
+OBJC2_32BIT_EXE: 		      name 0x4f43 didReceiveMemoryWarning
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3e80
+OBJC2_32BIT_EXE: 		      name 0x4e21 .cxx_destruct
+OBJC2_32BIT_EXE: 		     types 0x5d44 v8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3f90
+OBJC2_32BIT_EXE: 		      name 0x41a0 detailItem
+OBJC2_32BIT_EXE: 		     types 0x5c80 @8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3ed0
+OBJC2_32BIT_EXE: 		      name 0x5223 detailDescriptionLabel
+OBJC2_32BIT_EXE: 		     types 0x5c80 @8@0:4
+OBJC2_32BIT_EXE: 		       imp 0x3f00
+OBJC2_32BIT_EXE: 		      name 0x523a setDetailDescriptionLabel:
+OBJC2_32BIT_EXE: 		     types 0x5608 v12@0:4@8
+OBJC2_32BIT_EXE: 		       imp 0x3f40
+OBJC2_32BIT_EXE:             baseProtocols 0x0
+OBJC2_32BIT_EXE:                     ivars 0x68f0
+OBJC2_32BIT_EXE:                     entsize 20
+OBJC2_32BIT_EXE:                       count 2
+OBJC2_32BIT_EXE: 			   offset 0x6a68 4
+OBJC2_32BIT_EXE: 			     name 0x5255 _detailItem
+OBJC2_32BIT_EXE: 			     type 0x5d9e @
+OBJC2_32BIT_EXE: 			alignment 2
+OBJC2_32BIT_EXE: 			     size 4
+OBJC2_32BIT_EXE: 			   offset 0x6a6c 8
+OBJC2_32BIT_EXE: 			     name 0x5261 _detailDescriptionLabel
+OBJC2_32BIT_EXE: 			     type 0x5da0 @"UILabel"
+OBJC2_32BIT_EXE: 			alignment 2
+OBJC2_32BIT_EXE: 			     size 4
+OBJC2_32BIT_EXE:            weakIvarLayout 0x52f3
+OBJC2_32BIT_EXE:                 layout map: 0x11 
+OBJC2_32BIT_EXE:            baseProperties 0x6920
+OBJC2_32BIT_EXE:                     entsize 8
+OBJC2_32BIT_EXE:                       count 2
+OBJC2_32BIT_EXE: 			     name 0x5ea5 detailItem
+OBJC2_32BIT_EXE: 			attributes 0x5eb0 T@,&,N,V_detailItem
+OBJC2_32BIT_EXE: 			     name 0x5ec4 detailDescriptionLabel
+OBJC2_32BIT_EXE: 			attributes 0x5edb T@"UILabel",W,N,V_detailDescriptionLabel
+OBJC2_32BIT_EXE: Meta Class
+OBJC2_32BIT_EXE:            isa 0x0
+OBJC2_32BIT_EXE:     superclass 0x0
+OBJC2_32BIT_EXE:          cache 0x0
+OBJC2_32BIT_EXE:         vtable 0x0
+OBJC2_32BIT_EXE:           data 0x6860 (struct class_ro_t *)
+OBJC2_32BIT_EXE:                     flags 0x185 RO_META RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_EXE:             instanceStart 20
+OBJC2_32BIT_EXE:              instanceSize 20
+OBJC2_32BIT_EXE:                ivarLayout 0x0
+OBJC2_32BIT_EXE:                      name 0x52db DetailViewController
+OBJC2_32BIT_EXE:               baseMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_EXE:             baseProtocols 0x0
+OBJC2_32BIT_EXE:                     ivars 0x0
+OBJC2_32BIT_EXE:            weakIvarLayout 0x0
+OBJC2_32BIT_EXE:            baseProperties 0x0
+OBJC2_32BIT_EXE: Contents of (__DATA,__objc_classrefs) section
+OBJC2_32BIT_EXE: 00006a30 0x0
+OBJC2_32BIT_EXE: 00006a34 0x6ac0
+OBJC2_32BIT_EXE: 00006a38 0x0
+OBJC2_32BIT_EXE: 00006a3c 0x0
+OBJC2_32BIT_EXE: 00006a40 0x0
+OBJC2_32BIT_EXE: 00006a44 0x0
+OBJC2_32BIT_EXE: 00006a48 0x0
+OBJC2_32BIT_EXE: 00006a4c 0x0
+OBJC2_32BIT_EXE: 00006a50 0x6a84
+OBJC2_32BIT_EXE: Contents of (__DATA,__objc_superrefs) section
+OBJC2_32BIT_EXE: 00006a54 0x6a98
+OBJC2_32BIT_EXE: 00006a58 0x6ac0
+OBJC2_32BIT_EXE: Contents of (__DATA,__objc_protolist) section
+OBJC2_32BIT_EXE: 00006074 0x6ae8
+OBJC2_32BIT_EXE: 00006078 0x6b14
+OBJC2_32BIT_EXE: 0000607c 0x6b40
+OBJC2_32BIT_EXE: Contents of (__DATA,__objc_imageinfo) section
+OBJC2_32BIT_EXE:   version 0
+OBJC2_32BIT_EXE:     flags 0x20
+
+OBJC2_32BIT_OBJ: Objective-C segment
+OBJC2_32BIT_OBJ: Contents of (__DATA,__objc_classlist) section
+OBJC2_32BIT_OBJ: 00003ae4 0x3914 _OBJC_CLASS_$_DetailViewController
+OBJC2_32BIT_OBJ:            isa 0x3928 _OBJC_METACLASS_$_DetailViewController
+OBJC2_32BIT_OBJ:     superclass 0x0 _OBJC_CLASS_$_UIViewController
+OBJC2_32BIT_OBJ:          cache 0x0 __objc_empty_cache
+OBJC2_32BIT_OBJ:         vtable 0x0 -[DetailViewController setDetailItem:]
+OBJC2_32BIT_OBJ:           data 0x3a38 (struct class_ro_t *)
+OBJC2_32BIT_OBJ:                     flags 0x184 RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_OBJ:             instanceStart 4
+OBJC2_32BIT_OBJ:              instanceSize 12
+OBJC2_32BIT_OBJ:                ivarLayout 0x3955
+OBJC2_32BIT_OBJ:                 layout map: 0x01 0x10 
+OBJC2_32BIT_OBJ:                      name 0x3940 DetailViewController
+OBJC2_32BIT_OBJ:               baseMethods 0x3988 (struct method_list_t *)
+OBJC2_32BIT_OBJ: 		   entsize 12
+OBJC2_32BIT_OBJ: 		     count 8
+OBJC2_32BIT_OBJ: 		      name 0x3899 setDetailItem:
+OBJC2_32BIT_OBJ: 		     types 0x3a60 v12@0:4@8
+OBJC2_32BIT_OBJ: 		       imp 0x0 -[DetailViewController setDetailItem:]
+OBJC2_32BIT_OBJ: 		      name 0x3830 configureView
+OBJC2_32BIT_OBJ: 		     types 0x3a6a v8@0:4
+OBJC2_32BIT_OBJ: 		       imp 0xb0 -[DetailViewController configureView]
+OBJC2_32BIT_OBJ: 		      name 0x3875 viewDidLoad
+OBJC2_32BIT_OBJ: 		     types 0x3a6a v8@0:4
+OBJC2_32BIT_OBJ: 		       imp 0x1b0 -[DetailViewController viewDidLoad]
+OBJC2_32BIT_OBJ: 		      name 0x3881 didReceiveMemoryWarning
+OBJC2_32BIT_OBJ: 		     types 0x3a6a v8@0:4
+OBJC2_32BIT_OBJ: 		       imp 0x210 -[DetailViewController didReceiveMemoryWarning]
+OBJC2_32BIT_OBJ: 		      name 0x38a8 .cxx_destruct
+OBJC2_32BIT_OBJ: 		     types 0x3a6a v8@0:4
+OBJC2_32BIT_OBJ: 		       imp 0x320 -[DetailViewController .cxx_destruct]
+OBJC2_32BIT_OBJ: 		      name 0x383e detailItem
+OBJC2_32BIT_OBJ: 		     types 0x3a71 @8@0:4
+OBJC2_32BIT_OBJ: 		       imp 0x260 -[DetailViewController detailItem]
+OBJC2_32BIT_OBJ: 		      name 0x3849 detailDescriptionLabel
+OBJC2_32BIT_OBJ: 		     types 0x3a71 @8@0:4
+OBJC2_32BIT_OBJ: 		       imp 0x290 -[DetailViewController detailDescriptionLabel]
+OBJC2_32BIT_OBJ: 		      name 0x38b6 setDetailDescriptionLabel:
+OBJC2_32BIT_OBJ: 		     types 0x3a60 v12@0:4@8
+OBJC2_32BIT_OBJ: 		       imp 0x2d0 -[DetailViewController setDetailDescriptionLabel:]
+OBJC2_32BIT_OBJ:             baseProtocols 0x0
+OBJC2_32BIT_OBJ:                     ivars 0x39f0
+OBJC2_32BIT_OBJ:                     entsize 20
+OBJC2_32BIT_OBJ:                       count 2
+OBJC2_32BIT_OBJ: 			   offset 0x3828 4
+OBJC2_32BIT_OBJ: 			     name 0x38d1 _detailItem
+OBJC2_32BIT_OBJ: 			     type 0x3a78 @
+OBJC2_32BIT_OBJ: 			alignment 2
+OBJC2_32BIT_OBJ: 			     size 4
+OBJC2_32BIT_OBJ: 			   offset 0x382c 8
+OBJC2_32BIT_OBJ: 			     name 0x38dd _detailDescriptionLabel
+OBJC2_32BIT_OBJ: 			     type 0x3a7a @"UILabel"
+OBJC2_32BIT_OBJ: 			alignment 2
+OBJC2_32BIT_OBJ: 			     size 4
+OBJC2_32BIT_OBJ:            weakIvarLayout 0x3958
+OBJC2_32BIT_OBJ:                 layout map: 0x11 
+OBJC2_32BIT_OBJ:            baseProperties 0x3a20
+OBJC2_32BIT_OBJ:                     entsize 8
+OBJC2_32BIT_OBJ:                       count 2
+OBJC2_32BIT_OBJ: 			     name 0x3a85 detailItem
+OBJC2_32BIT_OBJ: 			attributes 0x3a90 T@,&,N,V_detailItem
+OBJC2_32BIT_OBJ: 			     name 0x3aa4 detailDescriptionLabel
+OBJC2_32BIT_OBJ: 			attributes 0x3abb T@"UILabel",W,N,V_detailDescriptionLabel
+OBJC2_32BIT_OBJ: Meta Class
+OBJC2_32BIT_OBJ:            isa 0x0 _OBJC_METACLASS_$_NSObject
+OBJC2_32BIT_OBJ:     superclass 0x0 _OBJC_METACLASS_$_UIViewController
+OBJC2_32BIT_OBJ:          cache 0x0 __objc_empty_cache
+OBJC2_32BIT_OBJ:         vtable 0x0 -[DetailViewController setDetailItem:]
+OBJC2_32BIT_OBJ:           data 0x3960 (struct class_ro_t *)
+OBJC2_32BIT_OBJ:                     flags 0x185 RO_META RO_HAS_CXX_STRUCTORS
+OBJC2_32BIT_OBJ:             instanceStart 20
+OBJC2_32BIT_OBJ:              instanceSize 20
+OBJC2_32BIT_OBJ:                ivarLayout 0x0
+OBJC2_32BIT_OBJ:                      name 0x3940 DetailViewController
+OBJC2_32BIT_OBJ:               baseMethods 0x0 (struct method_list_t *)
+OBJC2_32BIT_OBJ:             baseProtocols 0x0
+OBJC2_32BIT_OBJ:                     ivars 0x0
+OBJC2_32BIT_OBJ:            weakIvarLayout 0x0
+OBJC2_32BIT_OBJ:            baseProperties 0x0
+OBJC2_32BIT_OBJ: Contents of (__DATA,__objc_superrefs) section
+OBJC2_32BIT_OBJ: 0000393c 0x3914 _OBJC_CLASS_$_DetailViewController
+OBJC2_32BIT_OBJ: Contents of (__DATA,__objc_imageinfo) section
+OBJC2_32BIT_OBJ:   version 0
+OBJC2_32BIT_OBJ:     flags 0x20
+
+OBJC1_32BIT_EXE: Objective-C segment
+OBJC1_32BIT_EXE: Module 0x4128
+OBJC1_32BIT_EXE:     version 7
+OBJC1_32BIT_EXE:        size 16
+OBJC1_32BIT_EXE:        name 
+OBJC1_32BIT_EXE:      symtab 0x00004108
+OBJC1_32BIT_EXE: 	sel_ref_cnt 0
+OBJC1_32BIT_EXE: 	refs 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 	cls_def_cnt 1
+OBJC1_32BIT_EXE: 	cat_def_cnt 0
+OBJC1_32BIT_EXE: 	Class Definitions
+OBJC1_32BIT_EXE: 	defs[0] 0x00004000
+OBJC1_32BIT_EXE: 		      isa 0x00004068
+OBJC1_32BIT_EXE: 	      super_class 0x000025b8 NSViewController
+OBJC1_32BIT_EXE: 		     name 0x000025c9 ViewController
+OBJC1_32BIT_EXE: 		  version 0x00000000
+OBJC1_32BIT_EXE: 		     info 0x00000001 CLS_CLASS
+OBJC1_32BIT_EXE: 	    instance_size 0x00000034
+OBJC1_32BIT_EXE: 		    ivars 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		  methods 0x000040c8
+OBJC1_32BIT_EXE: 		         obsolete 0x00000000
+OBJC1_32BIT_EXE: 		     method_count 2
+OBJC1_32BIT_EXE: 		      method_name 0x0000257c viewDidLoad
+OBJC1_32BIT_EXE: 		     method_types 0x0000259e v8@0:4
+OBJC1_32BIT_EXE: 		       method_imp 0x00002430 
+OBJC1_32BIT_EXE: 		      method_name 0x00002588 setRepresentedObject:
+OBJC1_32BIT_EXE: 		     method_types 0x000025a5 v12@0:4@8
+OBJC1_32BIT_EXE: 		       method_imp 0x00002480 
+OBJC1_32BIT_EXE: 		    cache 0x00000000
+OBJC1_32BIT_EXE: 		protocols 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 	Meta Class
+OBJC1_32BIT_EXE: 		      isa 0x000025af NSObject
+OBJC1_32BIT_EXE: 	      super_class 0x000025b8 NSViewController
+OBJC1_32BIT_EXE: 		     name 0x000025c9 ViewController
+OBJC1_32BIT_EXE: 		  version 0x00000000
+OBJC1_32BIT_EXE: 		     info 0x00000002 CLS_META
+OBJC1_32BIT_EXE: 	    instance_size 0x00000030
+OBJC1_32BIT_EXE: 		    ivars 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		  methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		    cache 0x00000000
+OBJC1_32BIT_EXE: 		protocols 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: Module 0x4138
+OBJC1_32BIT_EXE:     version 7
+OBJC1_32BIT_EXE:        size 16
+OBJC1_32BIT_EXE:        name 
+OBJC1_32BIT_EXE:      symtab 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: Module 0x4148
+OBJC1_32BIT_EXE:     version 7
+OBJC1_32BIT_EXE:        size 16
+OBJC1_32BIT_EXE:        name 
+OBJC1_32BIT_EXE:      symtab 0x00004118
+OBJC1_32BIT_EXE: 	sel_ref_cnt 0
+OBJC1_32BIT_EXE: 	refs 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 	cls_def_cnt 1
+OBJC1_32BIT_EXE: 	cat_def_cnt 0
+OBJC1_32BIT_EXE: 	Class Definitions
+OBJC1_32BIT_EXE: 	defs[0] 0x00004030
+OBJC1_32BIT_EXE: 		      isa 0x00004098
+OBJC1_32BIT_EXE: 	      super_class 0x000025af NSObject
+OBJC1_32BIT_EXE: 		     name 0x00002faa AppDelegate
+OBJC1_32BIT_EXE: 		  version 0x00000000
+OBJC1_32BIT_EXE: 		     info 0x00000001 CLS_CLASS
+OBJC1_32BIT_EXE: 	    instance_size 0x00000004
+OBJC1_32BIT_EXE: 		    ivars 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		  methods 0x000040e8
+OBJC1_32BIT_EXE: 		         obsolete 0x00000000
+OBJC1_32BIT_EXE: 		     method_count 2
+OBJC1_32BIT_EXE: 		      method_name 0x00002c5b applicationDidFinishLaunching:
+OBJC1_32BIT_EXE: 		     method_types 0x000025a5 v12@0:4@8
+OBJC1_32BIT_EXE: 		       method_imp 0x00002510 
+OBJC1_32BIT_EXE: 		      method_name 0x00002d6f applicationWillTerminate:
+OBJC1_32BIT_EXE: 		     method_types 0x000025a5 v12@0:4@8
+OBJC1_32BIT_EXE: 		       method_imp 0x00002530 
+OBJC1_32BIT_EXE: 		    cache 0x00000000
+OBJC1_32BIT_EXE: 		protocols 0x000043b4
+OBJC1_32BIT_EXE: 		         next 0x00000000
+OBJC1_32BIT_EXE: 		        count 1
+OBJC1_32BIT_EXE: 		      list[0] 0x00004390
+OBJC1_32BIT_EXE: 		              isa 0x000030b0
+OBJC1_32BIT_EXE: 		    protocol_name 0x00002dd3 NSApplicationDelegate
+OBJC1_32BIT_EXE: 		    protocol_list 0x000043a4
+OBJC1_32BIT_EXE: 		             next 0x00000000
+OBJC1_32BIT_EXE: 		            count 1
+OBJC1_32BIT_EXE: 		          list[0] 0x0000437c
+OBJC1_32BIT_EXE: 		                  isa 0x00003120
+OBJC1_32BIT_EXE: 		        protocol_name 0x000025af NSObject
+OBJC1_32BIT_EXE: 		        protocol_list 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		     instance_methods 0x00004290
+OBJC1_32BIT_EXE: 		            count 19
+OBJC1_32BIT_EXE: 		            list[0]
+OBJC1_32BIT_EXE: 		                 name 0x00002de9 isEqual:
+OBJC1_32BIT_EXE: 		                types 0x000026e7 c12@0:4@8
+OBJC1_32BIT_EXE: 		            list[1]
+OBJC1_32BIT_EXE: 		                 name 0x00002df2 class
+OBJC1_32BIT_EXE: 		                types 0x00002df8 #8@0:4
+OBJC1_32BIT_EXE: 		            list[2]
+OBJC1_32BIT_EXE: 		                 name 0x00002dff self
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		            list[3]
+OBJC1_32BIT_EXE: 		                 name 0x00002e0b performSelector:
+OBJC1_32BIT_EXE: 		                types 0x00002e1c @12@0:4:8
+OBJC1_32BIT_EXE: 		            list[4]
+OBJC1_32BIT_EXE: 		                 name 0x00002e26 performSelector:withObject:
+OBJC1_32BIT_EXE: 		                types 0x00002e42 @16@0:4:8@12
+OBJC1_32BIT_EXE: 		            list[5]
+OBJC1_32BIT_EXE: 		                 name 0x00002e4f performSelector:withObject:withObject:
+OBJC1_32BIT_EXE: 		                types 0x00002e76 @20@0:4:8@12@16
+OBJC1_32BIT_EXE: 		            list[6]
+OBJC1_32BIT_EXE: 		                 name 0x00002e86 isProxy
+OBJC1_32BIT_EXE: 		                types 0x00002e8e c8@0:4
+OBJC1_32BIT_EXE: 		            list[7]
+OBJC1_32BIT_EXE: 		                 name 0x00002e95 isKindOfClass:
+OBJC1_32BIT_EXE: 		                types 0x00002ea4 c12@0:4#8
+OBJC1_32BIT_EXE: 		            list[8]
+OBJC1_32BIT_EXE: 		                 name 0x00002eae isMemberOfClass:
+OBJC1_32BIT_EXE: 		                types 0x00002ea4 c12@0:4#8
+OBJC1_32BIT_EXE: 		            list[9]
+OBJC1_32BIT_EXE: 		                 name 0x00002ebf conformsToProtocol:
+OBJC1_32BIT_EXE: 		                types 0x000026e7 c12@0:4@8
+OBJC1_32BIT_EXE: 		            list[10]
+OBJC1_32BIT_EXE: 		                 name 0x00002ee7 respondsToSelector:
+OBJC1_32BIT_EXE: 		                types 0x00002efb c12@0:4:8
+OBJC1_32BIT_EXE: 		            list[11]
+OBJC1_32BIT_EXE: 		                 name 0x00002f05 retain
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		            list[12]
+OBJC1_32BIT_EXE: 		                 name 0x00002f0c release
+OBJC1_32BIT_EXE: 		                types 0x00002f14 Vv8@0:4
+OBJC1_32BIT_EXE: 		            list[13]
+OBJC1_32BIT_EXE: 		                 name 0x00002f1c autorelease
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		            list[14]
+OBJC1_32BIT_EXE: 		                 name 0x00002f28 retainCount
+OBJC1_32BIT_EXE: 		                types 0x00002f34 I8@0:4
+OBJC1_32BIT_EXE: 		            list[15]
+OBJC1_32BIT_EXE: 		                 name 0x00002f3b zone
+OBJC1_32BIT_EXE: 		                types 0x00002f40 ^{_NSZone=}8@0:4
+OBJC1_32BIT_EXE: 		            list[16]
+OBJC1_32BIT_EXE: 		                 name 0x00002f51 hash
+OBJC1_32BIT_EXE: 		                types 0x00002f34 I8@0:4
+OBJC1_32BIT_EXE: 		            list[17]
+OBJC1_32BIT_EXE: 		                 name 0x00002f56 superclass
+OBJC1_32BIT_EXE: 		                types 0x00002df8 #8@0:4
+OBJC1_32BIT_EXE: 		            list[18]
+OBJC1_32BIT_EXE: 		                 name 0x00002f61 description
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		        class_methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		 instance_methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		    class_methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 	Meta Class
+OBJC1_32BIT_EXE: 		      isa 0x000025af NSObject
+OBJC1_32BIT_EXE: 	      super_class 0x000025af NSObject
+OBJC1_32BIT_EXE: 		     name 0x00002faa AppDelegate
+OBJC1_32BIT_EXE: 		  version 0x00000000
+OBJC1_32BIT_EXE: 		     info 0x00000002 CLS_META
+OBJC1_32BIT_EXE: 	    instance_size 0x00000030
+OBJC1_32BIT_EXE: 		    ivars 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		  methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		    cache 0x00000000
+OBJC1_32BIT_EXE: 		protocols 0x000043b4
+OBJC1_32BIT_EXE: 		         next 0x00000000
+OBJC1_32BIT_EXE: 		        count 1
+OBJC1_32BIT_EXE: 		      list[0] 0x00004390
+OBJC1_32BIT_EXE: 		              isa 0x000030b0
+OBJC1_32BIT_EXE: 		    protocol_name 0x00002dd3 NSApplicationDelegate
+OBJC1_32BIT_EXE: 		    protocol_list 0x000043a4
+OBJC1_32BIT_EXE: 		             next 0x00000000
+OBJC1_32BIT_EXE: 		            count 1
+OBJC1_32BIT_EXE: 		          list[0] 0x0000437c
+OBJC1_32BIT_EXE: 		                  isa 0x00003120
+OBJC1_32BIT_EXE: 		        protocol_name 0x000025af NSObject
+OBJC1_32BIT_EXE: 		        protocol_list 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		     instance_methods 0x00004290
+OBJC1_32BIT_EXE: 		            count 19
+OBJC1_32BIT_EXE: 		            list[0]
+OBJC1_32BIT_EXE: 		                 name 0x00002de9 isEqual:
+OBJC1_32BIT_EXE: 		                types 0x000026e7 c12@0:4@8
+OBJC1_32BIT_EXE: 		            list[1]
+OBJC1_32BIT_EXE: 		                 name 0x00002df2 class
+OBJC1_32BIT_EXE: 		                types 0x00002df8 #8@0:4
+OBJC1_32BIT_EXE: 		            list[2]
+OBJC1_32BIT_EXE: 		                 name 0x00002dff self
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		            list[3]
+OBJC1_32BIT_EXE: 		                 name 0x00002e0b performSelector:
+OBJC1_32BIT_EXE: 		                types 0x00002e1c @12@0:4:8
+OBJC1_32BIT_EXE: 		            list[4]
+OBJC1_32BIT_EXE: 		                 name 0x00002e26 performSelector:withObject:
+OBJC1_32BIT_EXE: 		                types 0x00002e42 @16@0:4:8@12
+OBJC1_32BIT_EXE: 		            list[5]
+OBJC1_32BIT_EXE: 		                 name 0x00002e4f performSelector:withObject:withObject:
+OBJC1_32BIT_EXE: 		                types 0x00002e76 @20@0:4:8@12@16
+OBJC1_32BIT_EXE: 		            list[6]
+OBJC1_32BIT_EXE: 		                 name 0x00002e86 isProxy
+OBJC1_32BIT_EXE: 		                types 0x00002e8e c8@0:4
+OBJC1_32BIT_EXE: 		            list[7]
+OBJC1_32BIT_EXE: 		                 name 0x00002e95 isKindOfClass:
+OBJC1_32BIT_EXE: 		                types 0x00002ea4 c12@0:4#8
+OBJC1_32BIT_EXE: 		            list[8]
+OBJC1_32BIT_EXE: 		                 name 0x00002eae isMemberOfClass:
+OBJC1_32BIT_EXE: 		                types 0x00002ea4 c12@0:4#8
+OBJC1_32BIT_EXE: 		            list[9]
+OBJC1_32BIT_EXE: 		                 name 0x00002ebf conformsToProtocol:
+OBJC1_32BIT_EXE: 		                types 0x000026e7 c12@0:4@8
+OBJC1_32BIT_EXE: 		            list[10]
+OBJC1_32BIT_EXE: 		                 name 0x00002ee7 respondsToSelector:
+OBJC1_32BIT_EXE: 		                types 0x00002efb c12@0:4:8
+OBJC1_32BIT_EXE: 		            list[11]
+OBJC1_32BIT_EXE: 		                 name 0x00002f05 retain
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		            list[12]
+OBJC1_32BIT_EXE: 		                 name 0x00002f0c release
+OBJC1_32BIT_EXE: 		                types 0x00002f14 Vv8@0:4
+OBJC1_32BIT_EXE: 		            list[13]
+OBJC1_32BIT_EXE: 		                 name 0x00002f1c autorelease
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		            list[14]
+OBJC1_32BIT_EXE: 		                 name 0x00002f28 retainCount
+OBJC1_32BIT_EXE: 		                types 0x00002f34 I8@0:4
+OBJC1_32BIT_EXE: 		            list[15]
+OBJC1_32BIT_EXE: 		                 name 0x00002f3b zone
+OBJC1_32BIT_EXE: 		                types 0x00002f40 ^{_NSZone=}8@0:4
+OBJC1_32BIT_EXE: 		            list[16]
+OBJC1_32BIT_EXE: 		                 name 0x00002f51 hash
+OBJC1_32BIT_EXE: 		                types 0x00002f34 I8@0:4
+OBJC1_32BIT_EXE: 		            list[17]
+OBJC1_32BIT_EXE: 		                 name 0x00002f56 superclass
+OBJC1_32BIT_EXE: 		                types 0x00002df8 #8@0:4
+OBJC1_32BIT_EXE: 		            list[18]
+OBJC1_32BIT_EXE: 		                 name 0x00002f61 description
+OBJC1_32BIT_EXE: 		                types 0x00002e04 @8@0:4
+OBJC1_32BIT_EXE: 		        class_methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		 instance_methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: 		    class_methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_EXE: Contents of (__OBJC,__image_info) section
+OBJC1_32BIT_EXE:   version 0
+OBJC1_32BIT_EXE:     flags 0x0 RR
+
+OBJC1_32BIT_OBJ: Objective-C segment
+OBJC1_32BIT_OBJ: Module 0xb344
+OBJC1_32BIT_OBJ:     version 7
+OBJC1_32BIT_OBJ:        size 16
+OBJC1_32BIT_OBJ:        name 
+OBJC1_32BIT_OBJ:      symtab 0x0000b334
+OBJC1_32BIT_OBJ: 	sel_ref_cnt 0
+OBJC1_32BIT_OBJ: 	refs 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_OBJ: 	cls_def_cnt 1
+OBJC1_32BIT_OBJ: 	cat_def_cnt 0
+OBJC1_32BIT_OBJ: 	Class Definitions
+OBJC1_32BIT_OBJ: 	defs[0] 0x0000b24c
+OBJC1_32BIT_OBJ: 		      isa 0x0000b2e4
+OBJC1_32BIT_OBJ: 	      super_class 0x0000b2b8 NSViewController
+OBJC1_32BIT_OBJ: 		     name 0x0000b2c9 ViewController
+OBJC1_32BIT_OBJ: 		  version 0x00000000
+OBJC1_32BIT_OBJ: 		     info 0x00000001 CLS_CLASS
+OBJC1_32BIT_OBJ: 	    instance_size 0x00000034
+OBJC1_32BIT_OBJ: 		    ivars 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_OBJ: 		  methods 0x0000b314
+OBJC1_32BIT_OBJ: 		         obsolete 0x00000000
+OBJC1_32BIT_OBJ: 		     method_count 2
+OBJC1_32BIT_OBJ: 		      method_name 0x0000b27c viewDidLoad
+OBJC1_32BIT_OBJ: 		     method_types 0x0000b29e v8@0:4
+OBJC1_32BIT_OBJ: 		       method_imp 0x00000000 -[ViewController viewDidLoad]
+OBJC1_32BIT_OBJ: 		      method_name 0x0000b288 setRepresentedObject:
+OBJC1_32BIT_OBJ: 		     method_types 0x0000b2a5 v12@0:4@8
+OBJC1_32BIT_OBJ: 		       method_imp 0x00000050 -[ViewController setRepresentedObject:]
+OBJC1_32BIT_OBJ: 		    cache 0x00000000
+OBJC1_32BIT_OBJ: 		protocols 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_OBJ: 	Meta Class
+OBJC1_32BIT_OBJ: 		      isa 0x0000b2af NSObject
+OBJC1_32BIT_OBJ: 	      super_class 0x0000b2b8 NSViewController
+OBJC1_32BIT_OBJ: 		     name 0x0000b2c9 ViewController
+OBJC1_32BIT_OBJ: 		  version 0x00000000
+OBJC1_32BIT_OBJ: 		     info 0x00000002 CLS_META
+OBJC1_32BIT_OBJ: 	    instance_size 0x00000030
+OBJC1_32BIT_OBJ: 		    ivars 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_OBJ: 		  methods 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_OBJ: 		    cache 0x00000000
+OBJC1_32BIT_OBJ: 		protocols 0x00000000 (not in an __OBJC section)
+OBJC1_32BIT_OBJ: Contents of (__OBJC,__image_info) section
+OBJC1_32BIT_OBJ:   version 0
+OBJC1_32BIT_OBJ:     flags 0x0 RR
+
+PROTOCOL: Contents of (__OBJC,__protocol) section
+PROTOCOL: Protocol 0x437c
+PROTOCOL:               isa 0x00003120
+PROTOCOL:     protocol_name 0x000025af NSObject
+PROTOCOL:     protocol_list 0x00000000 (not in an __OBJC section)
+PROTOCOL:  instance_methods 0x00004290
+PROTOCOL:         count 19
+PROTOCOL:         list[0]
+PROTOCOL:              name 0x00002de9 isEqual:
+PROTOCOL:             types 0x000026e7 c12@0:4@8
+PROTOCOL:         list[1]
+PROTOCOL:              name 0x00002df2 class
+PROTOCOL:             types 0x00002df8 #8@0:4
+PROTOCOL:         list[2]
+PROTOCOL:              name 0x00002dff self
+PROTOCOL:             types 0x00002e04 @8@0:4
+PROTOCOL:         list[3]
+PROTOCOL:              name 0x00002e0b performSelector:
+PROTOCOL:             types 0x00002e1c @12@0:4:8
+PROTOCOL:         list[4]
+PROTOCOL:              name 0x00002e26 performSelector:withObject:
+PROTOCOL:             types 0x00002e42 @16@0:4:8@12
+PROTOCOL:         list[5]
+PROTOCOL:              name 0x00002e4f performSelector:withObject:withObject:
+PROTOCOL:             types 0x00002e76 @20@0:4:8@12@16
+PROTOCOL:         list[6]
+PROTOCOL:              name 0x00002e86 isProxy
+PROTOCOL:             types 0x00002e8e c8@0:4
+PROTOCOL:         list[7]
+PROTOCOL:              name 0x00002e95 isKindOfClass:
+PROTOCOL:             types 0x00002ea4 c12@0:4#8
+PROTOCOL:         list[8]
+PROTOCOL:              name 0x00002eae isMemberOfClass:
+PROTOCOL:             types 0x00002ea4 c12@0:4#8
+PROTOCOL:         list[9]
+PROTOCOL:              name 0x00002ebf conformsToProtocol:
+PROTOCOL:             types 0x000026e7 c12@0:4@8
+PROTOCOL:         list[10]
+PROTOCOL:              name 0x00002ee7 respondsToSelector:
+PROTOCOL:             types 0x00002efb c12@0:4:8
+PROTOCOL:         list[11]
+PROTOCOL:              name 0x00002f05 retain
+PROTOCOL:             types 0x00002e04 @8@0:4
+PROTOCOL:         list[12]
+PROTOCOL:              name 0x00002f0c release
+PROTOCOL:             types 0x00002f14 Vv8@0:4
+PROTOCOL:         list[13]
+PROTOCOL:              name 0x00002f1c autorelease
+PROTOCOL:             types 0x00002e04 @8@0:4
+PROTOCOL:         list[14]
+PROTOCOL:              name 0x00002f28 retainCount
+PROTOCOL:             types 0x00002f34 I8@0:4
+PROTOCOL:         list[15]
+PROTOCOL:              name 0x00002f3b zone
+PROTOCOL:             types 0x00002f40 ^{_NSZone=}8@0:4
+PROTOCOL:         list[16]
+PROTOCOL:              name 0x00002f51 hash
+PROTOCOL:             types 0x00002f34 I8@0:4
+PROTOCOL:         list[17]
+PROTOCOL:              name 0x00002f56 superclass
+PROTOCOL:             types 0x00002df8 #8@0:4
+PROTOCOL:         list[18]
+PROTOCOL:              name 0x00002f61 description
+PROTOCOL:             types 0x00002e04 @8@0:4
+PROTOCOL:     class_methods 0x00000000 (not in an __OBJC section)
+PROTOCOL: Protocol 0x4390
+PROTOCOL:               isa 0x000030b0
+PROTOCOL:     protocol_name 0x00002dd3 NSApplicationDelegate
+PROTOCOL:     protocol_list 0x000043a4
+PROTOCOL:              next 0x00000000
+PROTOCOL:             count 1
+PROTOCOL:           list[0] 0x0000437c
+PROTOCOL:                   isa 0x00003120
+PROTOCOL:         protocol_name 0x000025af NSObject
+PROTOCOL:         protocol_list 0x00000000 (not in an __OBJC section)
+PROTOCOL:      instance_methods 0x00004290
+PROTOCOL:             count 19
+PROTOCOL:             list[0]
+PROTOCOL:                  name 0x00002de9 isEqual:
+PROTOCOL:                 types 0x000026e7 c12@0:4@8
+PROTOCOL:             list[1]
+PROTOCOL:                  name 0x00002df2 class
+PROTOCOL:                 types 0x00002df8 #8@0:4
+PROTOCOL:             list[2]
+PROTOCOL:                  name 0x00002dff self
+PROTOCOL:                 types 0x00002e04 @8@0:4
+PROTOCOL:             list[3]
+PROTOCOL:                  name 0x00002e0b performSelector:
+PROTOCOL:                 types 0x00002e1c @12@0:4:8
+PROTOCOL:             list[4]
+PROTOCOL:                  name 0x00002e26 performSelector:withObject:
+PROTOCOL:                 types 0x00002e42 @16@0:4:8@12
+PROTOCOL:             list[5]
+PROTOCOL:                  name 0x00002e4f performSelector:withObject:withObject:
+PROTOCOL:                 types 0x00002e76 @20@0:4:8@12@16
+PROTOCOL:             list[6]
+PROTOCOL:                  name 0x00002e86 isProxy
+PROTOCOL:                 types 0x00002e8e c8@0:4
+PROTOCOL:             list[7]
+PROTOCOL:                  name 0x00002e95 isKindOfClass:
+PROTOCOL:                 types 0x00002ea4 c12@0:4#8
+PROTOCOL:             list[8]
+PROTOCOL:                  name 0x00002eae isMemberOfClass:
+PROTOCOL:                 types 0x00002ea4 c12@0:4#8
+PROTOCOL:             list[9]
+PROTOCOL:                  name 0x00002ebf conformsToProtocol:
+PROTOCOL:                 types 0x000026e7 c12@0:4@8
+PROTOCOL:             list[10]
+PROTOCOL:                  name 0x00002ee7 respondsToSelector:
+PROTOCOL:                 types 0x00002efb c12@0:4:8
+PROTOCOL:             list[11]
+PROTOCOL:                  name 0x00002f05 retain
+PROTOCOL:                 types 0x00002e04 @8@0:4
+PROTOCOL:             list[12]
+PROTOCOL:                  name 0x00002f0c release
+PROTOCOL:                 types 0x00002f14 Vv8@0:4
+PROTOCOL:             list[13]
+PROTOCOL:                  name 0x00002f1c autorelease
+PROTOCOL:                 types 0x00002e04 @8@0:4
+PROTOCOL:             list[14]
+PROTOCOL:                  name 0x00002f28 retainCount
+PROTOCOL:                 types 0x00002f34 I8@0:4
+PROTOCOL:             list[15]
+PROTOCOL:                  name 0x00002f3b zone
+PROTOCOL:                 types 0x00002f40 ^{_NSZone=}8@0:4
+PROTOCOL:             list[16]
+PROTOCOL:                  name 0x00002f51 hash
+PROTOCOL:                 types 0x00002f34 I8@0:4
+PROTOCOL:             list[17]
+PROTOCOL:                  name 0x00002f56 superclass
+PROTOCOL:                 types 0x00002df8 #8@0:4
+PROTOCOL:             list[18]
+PROTOCOL:                  name 0x00002f61 description
+PROTOCOL:                 types 0x00002e04 @8@0:4
+PROTOCOL:         class_methods 0x00000000 (not in an __OBJC section)
+PROTOCOL:  instance_methods 0x00000000 (not in an __OBJC section)
+PROTOCOL:     class_methods 0x00000000 (not in an __OBJC section)
diff --git a/test/tools/llvm-objdump/macho-sections.test b/test/tools/llvm-objdump/macho-sections.test
new file mode 100644
index 0000000..31efd11
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-sections.test
@@ -0,0 +1,5 @@
+# RUN: llvm-objdump -macho -section=__data %p/Inputs/bind2.macho-x86_64 | FileCheck %s
+# RUN: llvm-objdump -macho -section=__data -raw %p/Inputs/bind2.macho-x86_64 | FileCheck --check-prefix=RAW %s
+
+# CHECK: bind2.macho-x86_64:
+# RAW-NOT: bind2.macho-x86_64:
diff --git a/test/tools/llvm-readobj/Inputs/macho-universal-archive.x86_64.i386 b/test/tools/llvm-readobj/Inputs/macho-universal-archive.x86_64.i386
new file mode 100644
index 0000000..1660714
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/macho-universal-archive.x86_64.i386
diff --git a/test/tools/llvm-readobj/macho-universal-x86_64.i386.test b/test/tools/llvm-readobj/macho-universal-x86_64.i386.test
index 502e0fb..2138bb7 100644
--- a/test/tools/llvm-readobj/macho-universal-x86_64.i386.test
+++ b/test/tools/llvm-readobj/macho-universal-x86_64.i386.test
@@ -4,6 +4,12 @@ RUN: | FileCheck %s -check-prefix MULTIHEADER
 RUN: llvm-readobj -sections %p/Inputs/macho-universal.x86_64.i386 \
 RUN: | FileCheck %s -check-prefix MULTISECTIONS
 
+RUN: llvm-readobj -h %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN: | FileCheck %s -check-prefix MULTIHEADER-ARCHIVE
+
+RUN: llvm-readobj -sections %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN: | FileCheck %s -check-prefix MULTISECTIONS-ARCHIVE
+
 MULTIHEADER: Format: Mach-O 64-bit x86-64
 MULTIHEADER: Arch: x86_64
 MULTIHEADER: AddressSize: 64bit
@@ -139,3 +145,153 @@ MULTISECTIONS:     Reserved1: 0x0
 MULTISECTIONS:     Reserved2: 0x0
 MULTISECTIONS:   }
 MULTISECTIONS: ]
+
+MULTIHEADER-ARCHIVE: File: hello.o
+MULTIHEADER-ARCHIVE: Format: Mach-O 64-bit x86-64
+MULTIHEADER-ARCHIVE: Arch: x86_64
+MULTIHEADER-ARCHIVE: AddressSize: 64bit
+MULTIHEADER-ARCHIVE: MachHeader {
+MULTIHEADER-ARCHIVE:   Magic: Magic64 (0xFEEDFACF)
+MULTIHEADER-ARCHIVE:   CpuType: X86-64 (0x1000007)
+MULTIHEADER-ARCHIVE:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+MULTIHEADER-ARCHIVE:   FileType: Relocatable (0x1)
+MULTIHEADER-ARCHIVE:   NumOfLoadCommands: 3
+MULTIHEADER-ARCHIVE:   SizeOfLoadCommands: 496
+MULTIHEADER-ARCHIVE:   Flags [ (0x2000)
+MULTIHEADER-ARCHIVE:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+MULTIHEADER-ARCHIVE:   ]
+MULTIHEADER-ARCHIVE:   Reserved: 0x0
+MULTIHEADER-ARCHIVE: }
+MULTIHEADER-ARCHIVE: File: foo.o
+MULTIHEADER-ARCHIVE: Format: Mach-O 32-bit i386
+MULTIHEADER-ARCHIVE: Arch: i386
+MULTIHEADER-ARCHIVE: AddressSize: 32bit
+MULTIHEADER-ARCHIVE: MachHeader {
+MULTIHEADER-ARCHIVE:   Magic: Magic (0xFEEDFACE)
+MULTIHEADER-ARCHIVE:   CpuType: X86 (0x7)
+MULTIHEADER-ARCHIVE:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+MULTIHEADER-ARCHIVE:   FileType: Relocatable (0x1)
+MULTIHEADER-ARCHIVE:   NumOfLoadCommands: 3
+MULTIHEADER-ARCHIVE:   SizeOfLoadCommands: 296
+MULTIHEADER-ARCHIVE:   Flags [ (0x2000)
+MULTIHEADER-ARCHIVE:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+MULTIHEADER-ARCHIVE:   ]
+MULTIHEADER-ARCHIVE: }
+
+MULTISECTIONS-ARCHIVE: File: hello.o
+MULTISECTIONS-ARCHIVE: Format: Mach-O 64-bit x86-64
+MULTISECTIONS-ARCHIVE: Arch: x86_64
+MULTISECTIONS-ARCHIVE: AddressSize: 64bit
+MULTISECTIONS-ARCHIVE: Sections [
+MULTISECTIONS-ARCHIVE:   Section {
+MULTISECTIONS-ARCHIVE:     Index: 0
+MULTISECTIONS-ARCHIVE:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Address: 0x0
+MULTISECTIONS-ARCHIVE:     Size: 0x3B
+MULTISECTIONS-ARCHIVE:     Offset: 528
+MULTISECTIONS-ARCHIVE:     Alignment: 4
+MULTISECTIONS-ARCHIVE:     RelocationOffset: 0x2B8
+MULTISECTIONS-ARCHIVE:     RelocationCount: 2
+MULTISECTIONS-ARCHIVE:     Type: 0x0
+MULTISECTIONS-ARCHIVE:     Attributes [ (0x800004)
+MULTISECTIONS-ARCHIVE:       PureInstructions (0x800000)
+MULTISECTIONS-ARCHIVE:       SomeInstructions (0x4)
+MULTISECTIONS-ARCHIVE:     ]
+MULTISECTIONS-ARCHIVE:     Reserved1: 0x0
+MULTISECTIONS-ARCHIVE:     Reserved2: 0x0
+MULTISECTIONS-ARCHIVE:   }
+MULTISECTIONS-ARCHIVE:   Section {
+MULTISECTIONS-ARCHIVE:     Index: 1
+MULTISECTIONS-ARCHIVE:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Address: 0x3B
+MULTISECTIONS-ARCHIVE:     Size: 0xD
+MULTISECTIONS-ARCHIVE:     Offset: 587
+MULTISECTIONS-ARCHIVE:     Alignment: 0
+MULTISECTIONS-ARCHIVE:     RelocationOffset: 0x0
+MULTISECTIONS-ARCHIVE:     RelocationCount: 0
+MULTISECTIONS-ARCHIVE:     Type: ExtReloc (0x2)
+MULTISECTIONS-ARCHIVE:     Attributes [ (0x0)
+MULTISECTIONS-ARCHIVE:     ]
+MULTISECTIONS-ARCHIVE:     Reserved1: 0x0
+MULTISECTIONS-ARCHIVE:     Reserved2: 0x0
+MULTISECTIONS-ARCHIVE:   }
+MULTISECTIONS-ARCHIVE:   Section {
+MULTISECTIONS-ARCHIVE:     Index: 2
+MULTISECTIONS-ARCHIVE:     Name: __compact_unwind (5F 5F 63 6F 6D 70 61 63 74 5F 75 6E 77 69 6E 64)
+MULTISECTIONS-ARCHIVE:     Segment: __LD (5F 5F 4C 44 00 00 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Address: 0x48
+MULTISECTIONS-ARCHIVE:     Size: 0x20
+MULTISECTIONS-ARCHIVE:     Offset: 600
+MULTISECTIONS-ARCHIVE:     Alignment: 3
+MULTISECTIONS-ARCHIVE:     RelocationOffset: 0x2C8
+MULTISECTIONS-ARCHIVE:     RelocationCount: 1
+MULTISECTIONS-ARCHIVE:     Type: 0x0
+MULTISECTIONS-ARCHIVE:     Attributes [ (0x20000)
+MULTISECTIONS-ARCHIVE:       Debug (0x20000)
+MULTISECTIONS-ARCHIVE:     ]
+MULTISECTIONS-ARCHIVE:     Reserved1: 0x0
+MULTISECTIONS-ARCHIVE:     Reserved2: 0x0
+MULTISECTIONS-ARCHIVE:   }
+MULTISECTIONS-ARCHIVE:   Section {
+MULTISECTIONS-ARCHIVE:     Index: 3
+MULTISECTIONS-ARCHIVE:     Name: __eh_frame (5F 5F 65 68 5F 66 72 61 6D 65 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Address: 0x68
+MULTISECTIONS-ARCHIVE:     Size: 0x40
+MULTISECTIONS-ARCHIVE:     Offset: 632
+MULTISECTIONS-ARCHIVE:     Alignment: 3
+MULTISECTIONS-ARCHIVE:     RelocationOffset: 0x0
+MULTISECTIONS-ARCHIVE:     RelocationCount: 0
+MULTISECTIONS-ARCHIVE:     Type: 0xB
+MULTISECTIONS-ARCHIVE:     Attributes [ (0x680000)
+MULTISECTIONS-ARCHIVE:       LiveSupport (0x80000)
+MULTISECTIONS-ARCHIVE:       NoTOC (0x400000)
+MULTISECTIONS-ARCHIVE:       StripStaticSyms (0x200000)
+MULTISECTIONS-ARCHIVE:     ]
+MULTISECTIONS-ARCHIVE:     Reserved1: 0x0
+MULTISECTIONS-ARCHIVE:     Reserved2: 0x0
+MULTISECTIONS-ARCHIVE:   }
+MULTISECTIONS-ARCHIVE: ]
+MULTISECTIONS-ARCHIVE: File: foo.o
+MULTISECTIONS-ARCHIVE: Format: Mach-O 32-bit i386
+MULTISECTIONS-ARCHIVE: Arch: i386
+MULTISECTIONS-ARCHIVE: AddressSize: 32bit
+MULTISECTIONS-ARCHIVE: Sections [
+MULTISECTIONS-ARCHIVE:   Section {
+MULTISECTIONS-ARCHIVE:     Index: 0
+MULTISECTIONS-ARCHIVE:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Address: 0x0
+MULTISECTIONS-ARCHIVE:     Size: 0x5
+MULTISECTIONS-ARCHIVE:     Offset: 324
+MULTISECTIONS-ARCHIVE:     Alignment: 4
+MULTISECTIONS-ARCHIVE:     RelocationOffset: 0x0
+MULTISECTIONS-ARCHIVE:     RelocationCount: 0
+MULTISECTIONS-ARCHIVE:     Type: 0x0
+MULTISECTIONS-ARCHIVE:     Attributes [ (0x800004)
+MULTISECTIONS-ARCHIVE:       PureInstructions (0x800000)
+MULTISECTIONS-ARCHIVE:       SomeInstructions (0x4)
+MULTISECTIONS-ARCHIVE:     ]
+MULTISECTIONS-ARCHIVE:     Reserved1: 0x0
+MULTISECTIONS-ARCHIVE:     Reserved2: 0x0
+MULTISECTIONS-ARCHIVE:   }
+MULTISECTIONS-ARCHIVE:   Section {
+MULTISECTIONS-ARCHIVE:     Index: 1
+MULTISECTIONS-ARCHIVE:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MULTISECTIONS-ARCHIVE:     Address: 0x8
+MULTISECTIONS-ARCHIVE:     Size: 0x4
+MULTISECTIONS-ARCHIVE:     Offset: 332
+MULTISECTIONS-ARCHIVE:     Alignment: 2
+MULTISECTIONS-ARCHIVE:     RelocationOffset: 0x0
+MULTISECTIONS-ARCHIVE:     RelocationCount: 0
+MULTISECTIONS-ARCHIVE:     Type: 0x0
+MULTISECTIONS-ARCHIVE:     Attributes [ (0x0)
+MULTISECTIONS-ARCHIVE:     ]
+MULTISECTIONS-ARCHIVE:     Reserved1: 0x0
+MULTISECTIONS-ARCHIVE:     Reserved2: 0x0
+MULTISECTIONS-ARCHIVE:   }
+MULTISECTIONS-ARCHIVE: ]
+
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 865cb51..43f4c29 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -93,7 +93,7 @@ std::unique_ptr<Module> llvm::parseInputFile(StringRef Filename,
   }
 
   if (verifyModule(*Result, &errs())) {
-    errs() << "bugpoint: " << Filename << ": error: does not verify\n";
+    errs() << "bugpoint: " << Filename << ": error: input module is broken!\n";
     return std::unique_ptr<Module>();
   }
 
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 98061bd..53631d2 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -852,7 +852,8 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
         // GetElementPtr *funcName, ulong 0, ulong 0
         std::vector<Constant*> GEPargs(2,
                      Constant::getNullValue(Type::getInt32Ty(F->getContext())));
-        Value *GEP = ConstantExpr::getGetElementPtr(funcName, GEPargs);
+        Value *GEP = ConstantExpr::getGetElementPtr(InitArray->getType(),
+                                                    funcName, GEPargs);
         std::vector<Value*> ResolverArgs;
         ResolverArgs.push_back(GEP);
 
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 481f343..344e7b5 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -42,6 +42,11 @@ namespace llvm {
   extern cl::opt<std::string> OutputPrefix;
 }
 
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+    "preserve-bc-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM bitcode."),
+    cl::init(true), cl::Hidden);
+
 namespace {
   // ChildOutput - This option captures the name of the child output file that
   // is set up by the parent bugpoint process
@@ -55,7 +60,7 @@ namespace {
 /// file.  If an error occurs, true is returned.
 ///
 static bool writeProgramToFileAux(tool_output_file &Out, const Module *M) {
-  WriteBitcodeToFile(M, Out.os());
+  WriteBitcodeToFile(M, Out.os(), PreserveBitcodeUseListOrder);
   Out.os().close();
   if (!Out.os().has_error()) {
     Out.keep();
@@ -151,7 +156,7 @@ bool BugDriver::runPasses(Module *Program,
 
   tool_output_file InFile(InputFilename, InputFD);
 
-  WriteBitcodeToFile(Program, InFile.os());
+  WriteBitcodeToFile(Program, InFile.os(), PreserveBitcodeUseListOrder);
   InFile.os().close();
   if (InFile.os().has_error()) {
     errs() << "Error writing bitcode file: " << InputFilename << "\n";
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index 454724a..5d67a94 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -165,7 +165,7 @@ public:
     ToolArgs.clear();
     if (Args) ToolArgs = *Args;
   }
-  ~LLC() { delete gcc; }
+  ~LLC() override { delete gcc; }
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 93ce3bc..724e93c 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -31,7 +31,7 @@
 #include "llvm/Linker/Linker.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/IRObjectFile.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -739,7 +739,7 @@ static void saveBCFile(StringRef Path, Module &M) {
   raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
   if (EC)
     message(LDPL_FATAL, "Failed to write the output file.");
-  WriteBitcodeToFile(&M, OS);
+  WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true);
 }
 
 static void codegen(Module &M) {
@@ -804,9 +804,8 @@ static void codegen(Module &M) {
 
   {
     raw_fd_ostream OS(FD, true);
-    formatted_raw_ostream FOS(OS);
 
-    if (TM->addPassesToEmitFile(CodeGenPasses, FOS,
+    if (TM->addPassesToEmitFile(CodeGenPasses, OS,
                                 TargetMachine::CGFT_ObjectFile))
       message(LDPL_FATAL, "Failed to setup codegen");
     CodeGenPasses.run(M);
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 55a45dc..ab50f2a 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Pass.h"
@@ -211,12 +212,6 @@ static int compileModule(char **argv, LLVMContext &Context) {
   bool SkipModule = MCPU == "help" ||
                     (!MAttrs.empty() && MAttrs.front() == "help");
 
-  // If user asked for the 'native' CPU, autodetect here. If autodection fails,
-  // this will set the CPU to an empty string which tells the target to
-  // pick a basic default.
-  if (MCPU == "native")
-    MCPU = sys::getHostCPUName();
-
   // If user just wants to list available options, skip module loading
   if (!SkipModule) {
     M = parseIRFile(InputFilename, Err, Context);
@@ -225,6 +220,14 @@ static int compileModule(char **argv, LLVMContext &Context) {
       return 1;
     }
 
+    // Verify module immediately to catch problems before doInitialization() is
+    // called on any passes.
+    if (!NoVerify && verifyModule(*M, &errs())) {
+      errs() << argv[0] << ": " << InputFilename
+             << ": error: input module is broken!\n";
+      return 1;
+    }
+
     // If we are supposed to override the target triple, do so now.
     if (!TargetTriple.empty())
       M->setTargetTriple(Triple::normalize(TargetTriple));
@@ -247,13 +250,31 @@ static int compileModule(char **argv, LLVMContext &Context) {
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  if (MAttrs.size()) {
+  if (!MAttrs.empty() || MCPU == "native") {
     SubtargetFeatures Features;
+
+    // If user asked for the 'native' CPU, we need to autodetect features.
+    // This is necessary for x86 where the CPU might not support all the
+    // features the autodetected CPU name lists in the target. For example,
+    // not all Sandybridge processors support AVX.
+    if (MCPU == "native") {
+      StringMap<bool> HostFeatures;
+      if (sys::getHostCPUFeatures(HostFeatures))
+        for (auto &F : HostFeatures)
+          Features.AddFeature(F.first(), F.second);
+    }
+
     for (unsigned i = 0; i != MAttrs.size(); ++i)
       Features.AddFeature(MAttrs[i]);
     FeaturesStr = Features.getString();
   }
 
+  // If user asked for the 'native' CPU, autodetect here. If autodection fails,
+  // this will set the CPU to an empty string which tells the target to
+  // pick a basic default.
+  if (MCPU == "native")
+    MCPU = sys::getHostCPUName();
+
   CodeGenOpt::Level OLvl = CodeGenOpt::Default;
   switch (OptLevel) {
   default:
@@ -314,7 +335,13 @@ static int compileModule(char **argv, LLVMContext &Context) {
              << ": warning: ignoring -mc-relax-all because filetype != obj";
 
   {
-    formatted_raw_ostream FOS(Out->os());
+    raw_pwrite_stream *OS = &Out->os();
+    std::unique_ptr<buffer_ostream> BOS;
+    if (FileType != TargetMachine::CGFT_AssemblyFile &&
+        !Out->os().supportsSeeking()) {
+      BOS = make_unique<buffer_ostream>(*OS);
+      OS = BOS.get();
+    }
 
     AnalysisID StartAfterID = nullptr;
     AnalysisID StopAfterID = nullptr;
@@ -337,8 +364,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
     }
 
     // Ask the target to add backend passes as necessary.
-    if (Target->addPassesToEmitFile(PM, FOS, FileType, NoVerify,
-                                    StartAfterID, StopAfterID)) {
+    if (Target->addPassesToEmitFile(PM, *OS, FileType, NoVerify, StartAfterID,
+                                    StopAfterID)) {
       errs() << argv[0] << ": target does not support generation of this"
              << " file type!\n";
       return 1;
diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp
index 4a8d3b9..ff8baf0 100644
--- a/tools/lli/OrcLazyJIT.cpp
+++ b/tools/lli/OrcLazyJIT.cpp
@@ -9,34 +9,130 @@
 
 #include "OrcLazyJIT.h"
 #include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include <system_error>
 
 using namespace llvm;
 
-std::unique_ptr<OrcLazyJIT::CompileCallbackMgr>
-OrcLazyJIT::createCallbackMgr(Triple T, LLVMContext &Context) {
+namespace {
+
+  enum class DumpKind { NoDump, DumpFuncsToStdOut, DumpModsToStdErr,
+                        DumpModsToDisk };
+
+  cl::opt<DumpKind> OrcDumpKind("orc-lazy-debug",
+                                cl::desc("Debug dumping for the orc-lazy JIT."),
+                                cl::init(DumpKind::NoDump),
+                                cl::values(
+                                  clEnumValN(DumpKind::NoDump, "no-dump",
+                                             "Don't dump anything."),
+                                  clEnumValN(DumpKind::DumpFuncsToStdOut,
+                                             "funcs-to-stdout",
+                                             "Dump function names to stdout."),
+                                  clEnumValN(DumpKind::DumpModsToStdErr,
+                                             "mods-to-stderr",
+                                             "Dump modules to stderr."),
+                                  clEnumValN(DumpKind::DumpModsToDisk,
+                                             "mods-to-disk",
+                                             "Dump modules to the current "
+                                             "working directory. (WARNING: "
+                                             "will overwrite existing files)."),
+                                  clEnumValEnd));
+}
+
+OrcLazyJIT::CallbackManagerBuilder
+OrcLazyJIT::createCallbackManagerBuilder(Triple T) {
   switch (T.getArch()) {
-    default:
-      // Flag error.
-      Error = true;
-      return nullptr;
+    default: return nullptr;
 
     case Triple::x86_64: {
-      typedef orc::JITCompileCallbackManager<CompileLayerT,
+      typedef orc::JITCompileCallbackManager<IRDumpLayerT,
                                              orc::OrcX86_64> CCMgrT;
-      return make_unique<CCMgrT>(CompileLayer, Context, 0, 64);
+      return [](IRDumpLayerT &IRDumpLayer, RuntimeDyld::MemoryManager &MemMgr,
+                LLVMContext &Context) {
+               return llvm::make_unique<CCMgrT>(IRDumpLayer, MemMgr, Context, 0,
+                                                64);
+             };
     }
   }
 }
 
+OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() {
+
+  switch (OrcDumpKind) {
+  case DumpKind::NoDump:
+    return [](std::unique_ptr<Module> M) { return std::move(M); };
+
+  case DumpKind::DumpFuncsToStdOut:
+    return [](std::unique_ptr<Module> M) {
+      printf("[ ");
+
+      for (const auto &F : *M) {
+        if (F.isDeclaration())
+          continue;
+
+        if (F.hasName()) {
+          std::string Name(F.getName());
+          printf("%s ", Name.c_str());
+        } else
+          printf("<anon> ");
+      }
+
+      printf("]\n");
+      return std::move(M);
+    };
+
+  case DumpKind::DumpModsToStdErr:
+    return [](std::unique_ptr<Module> M) {
+             dbgs() << "----- Module Start -----\n" << *M
+                    << "----- Module End -----\n";
+
+             return std::move(M);
+           };
+
+  case DumpKind::DumpModsToDisk:
+    return [](std::unique_ptr<Module> M) {
+             std::error_code EC;
+             raw_fd_ostream Out(M->getModuleIdentifier() + ".ll", EC,
+                                sys::fs::F_Text);
+             if (EC) {
+               errs() << "Couldn't open " << M->getModuleIdentifier()
+                      << " for dumping.\nError:" << EC.message() << "\n";
+               exit(1);
+             }
+             Out << *M;
+             return std::move(M);
+           };
+  }
+  llvm_unreachable("Unknown DumpKind");
+}
+
 int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
-  OrcLazyJIT J(std::unique_ptr<TargetMachine>(EngineBuilder().selectTarget()),
-               getGlobalContext());
+  // Add the program's symbols into the JIT's search space.
+  if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
+    errs() << "Error loading program symbols.\n";
+    return 1;
+  }
+
+  // Grab a target machine and try to build a factory function for the
+  // target-specific Orc callback manager.
+  auto TM = std::unique_ptr<TargetMachine>(EngineBuilder().selectTarget());
+  auto &Context = getGlobalContext();
+  auto CallbackMgrBuilder =
+    OrcLazyJIT::createCallbackManagerBuilder(Triple(TM->getTargetTriple()));
 
-  if (!J.Ok()) {
-    errs() << "Could not construct JIT.\n";
+  // If we couldn't build the factory function then there must not be a callback
+  // manager for this target. Bail out.
+  if (!CallbackMgrBuilder) {
+    errs() << "No callback manager available for target '"
+           << TM->getTargetTriple() << "'.\n";
     return 1;
   }
 
+  // Everything looks good. Build the JIT.
+  OrcLazyJIT J(std::move(TM), Context, CallbackMgrBuilder);
+
+  // Add the module, look up main and run it.
   auto MainHandle = J.addModule(std::move(M));
   auto MainSym = J.findSymbolIn(MainHandle, "main");
 
@@ -46,8 +142,6 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
   }
 
   typedef int (*MainFnPtr)(int, char*[]);
-  auto Main = reinterpret_cast<MainFnPtr>(
-                static_cast<uintptr_t>(MainSym.getAddress()));
-
+  auto Main = OrcLazyJIT::fromTargetAddress<MainFnPtr>(MainSym.getAddress());
   return Main(ArgC, ArgV);
 }
diff --git a/tools/lli/OrcLazyJIT.h b/tools/lli/OrcLazyJIT.h
index 76e1ac6..2b2db6e 100644
--- a/tools/lli/OrcLazyJIT.h
+++ b/tools/lli/OrcLazyJIT.h
@@ -18,9 +18,12 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/IR/LLVMContext.h"
 
 namespace llvm {
@@ -31,30 +34,96 @@ public:
   typedef orc::JITCompileCallbackManagerBase CompileCallbackMgr;
   typedef orc::ObjectLinkingLayer<> ObjLayerT;
   typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
-  typedef orc::LazyEmittingLayer<CompileLayerT> LazyEmitLayerT;
+  typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
+    TransformFtor;
+  typedef orc::IRTransformLayer<CompileLayerT, TransformFtor> IRDumpLayerT;
+  typedef orc::LazyEmittingLayer<IRDumpLayerT> LazyEmitLayerT;
   typedef orc::CompileOnDemandLayer<LazyEmitLayerT,
                                     CompileCallbackMgr> CODLayerT;
   typedef CODLayerT::ModuleSetHandleT ModuleHandleT;
 
-  OrcLazyJIT(std::unique_ptr<TargetMachine> TM, LLVMContext &Context)
-    : Error(false), TM(std::move(TM)),
+  typedef std::function<
+            std::unique_ptr<CompileCallbackMgr>(IRDumpLayerT&,
+                                                RuntimeDyld::MemoryManager&,
+                                                LLVMContext&)>
+    CallbackManagerBuilder;
+
+  static CallbackManagerBuilder createCallbackManagerBuilder(Triple T);
+
+  OrcLazyJIT(std::unique_ptr<TargetMachine> TM, LLVMContext &Context,
+             CallbackManagerBuilder &BuildCallbackMgr)
+    : TM(std::move(TM)),
       Mang(this->TM->getDataLayout()),
-      ObjectLayer([](){ return llvm::make_unique<SectionMemoryManager>(); }),
+      ObjectLayer(),
       CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)),
-      LazyEmitLayer(CompileLayer),
-      CCMgr(createCallbackMgr(Triple(this->TM->getTargetTriple()), Context)),
-      CODLayer(LazyEmitLayer, *CCMgr) { }
+      IRDumpLayer(CompileLayer, createDebugDumper()),
+      LazyEmitLayer(IRDumpLayer),
+      CCMgr(BuildCallbackMgr(IRDumpLayer, CCMgrMemMgr, Context)),
+      CODLayer(LazyEmitLayer, *CCMgr),
+      CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {}
+
+  ~OrcLazyJIT() {
+    // Run any destructors registered with __cxa_atexit.
+    CXXRuntimeOverrides.runDestructors();
+    // Run any IR destructors.
+    for (auto &DtorRunner : IRStaticDestructorRunners)
+      DtorRunner.runViaLayer(CODLayer);
+  }
 
-  bool Ok() const { return !Error; }
+  template <typename PtrTy>
+  static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
+    return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
+  }
 
   ModuleHandleT addModule(std::unique_ptr<Module> M) {
     // Attach a data-layout if one isn't already present.
     if (M->getDataLayout().isDefault())
       M->setDataLayout(*TM->getDataLayout());
 
+    // Record the static constructors and destructors. We have to do this before
+    // we hand over ownership of the module to the JIT.
+    std::vector<std::string> CtorNames, DtorNames;
+    for (auto Ctor : orc::getConstructors(*M))
+      CtorNames.push_back(mangle(Ctor.Func->getName()));
+    for (auto Dtor : orc::getDestructors(*M))
+      DtorNames.push_back(mangle(Dtor.Func->getName()));
+
+    // Symbol resolution order:
+    //   1) Search the JIT symbols.
+    //   2) Check for C++ runtime overrides.
+    //   3) Search the host process (LLI)'s symbol table.
+    auto Resolver =
+      orc::createLambdaResolver(
+        [this](const std::string &Name) {
+
+          if (auto Sym = CODLayer.findSymbol(Name, true))
+            return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+
+          if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name))
+            return Sym;
+
+          if (auto Addr = RTDyldMemoryManager::getSymbolAddressInProcess(Name))
+            return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
+
+          return RuntimeDyld::SymbolInfo(nullptr);
+        },
+        [](const std::string &Name) { return RuntimeDyld::SymbolInfo(nullptr); }
+      );
+
+    // Add the module to the JIT.
     std::vector<std::unique_ptr<Module>> S;
     S.push_back(std::move(M));
-    return CODLayer.addModuleSet(std::move(S));
+    auto H = CODLayer.addModuleSet(std::move(S), nullptr, std::move(Resolver));
+
+    // Run the static constructors, and save the static destructor runner for
+    // execution when the JIT is torn down.
+    orc::CtorDtorRunner<CODLayerT> CtorRunner(std::move(CtorNames), H);
+    CtorRunner.runViaLayer(CODLayer);
+
+    IRStaticDestructorRunners.push_back(
+        orc::CtorDtorRunner<CODLayerT>(std::move(DtorNames), H));
+
+    return H;
   }
 
   orc::JITSymbol findSymbol(const std::string &Name) {
@@ -67,9 +136,6 @@ public:
 
 private:
 
-  std::unique_ptr<CompileCallbackMgr>
-  createCallbackMgr(Triple T, LLVMContext &Context);
-
   std::string mangle(const std::string &Name) {
     std::string MangledName;
     {
@@ -79,15 +145,21 @@ private:
     return MangledName;
   }
 
-  bool Error;
+  static TransformFtor createDebugDumper();
+
   std::unique_ptr<TargetMachine> TM;
   Mangler Mang;
+  SectionMemoryManager CCMgrMemMgr;
 
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
+  IRDumpLayerT IRDumpLayer;
   LazyEmitLayerT LazyEmitLayer;
   std::unique_ptr<CompileCallbackMgr> CCMgr;
   CODLayerT CODLayer;
+
+  orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
+  std::vector<orc::CtorDtorRunner<CODLayerT>> IRStaticDestructorRunners;
 };
 
 int runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]);
diff --git a/tools/lli/RemoteMemoryManager.h b/tools/lli/RemoteMemoryManager.h
index 895bcda..5733fa5 100644
--- a/tools/lli/RemoteMemoryManager.h
+++ b/tools/lli/RemoteMemoryManager.h
@@ -64,7 +64,7 @@ private:
 
 public:
   RemoteMemoryManager() : Target(nullptr) {}
-  virtual ~RemoteMemoryManager();
+  ~RemoteMemoryManager() override;
 
   uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID,
diff --git a/tools/lli/RemoteTargetExternal.h b/tools/lli/RemoteTargetExternal.h
index bb621f5..afe8570 100644
--- a/tools/lli/RemoteTargetExternal.h
+++ b/tools/lli/RemoteTargetExternal.h
@@ -106,7 +106,7 @@ public:
   void stop() override;
 
   RemoteTargetExternal(std::string &Name) : RemoteTarget(), ChildName(Name) {}
-  virtual ~RemoteTargetExternal() {}
+  ~RemoteTargetExternal() override {}
 
 private:
   std::string ChildName;
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 47ce2c0..c1e3522 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -271,7 +271,7 @@ public:
         this->CacheDir[this->CacheDir.size() - 1] != '/')
       this->CacheDir += '/';
   }
-  virtual ~LLIObjectCache() {}
+  ~LLIObjectCache() override {}
 
   void notifyObjectCompiled(const Module *M, MemoryBufferRef Obj) override {
     const std::string ModuleID = M->getModuleIdentifier();
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 5ccf505..c6b3f93 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -51,6 +51,11 @@ static cl::opt<bool>
 DisableVerify("disable-verify", cl::Hidden,
               cl::desc("Do not run verifier on input LLVM (dangerous!)"));
 
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+    "preserve-bc-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM bitcode."),
+    cl::init(true), cl::Hidden);
+
 static void WriteOutputFile(const Module *M) {
   // Infer the output filename if needed.
   if (OutputFilename.empty()) {
@@ -78,7 +83,7 @@ static void WriteOutputFile(const Module *M) {
   }
 
   if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
-    WriteBitcodeToFile(M, Out->os());
+    WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder);
 
   // Declare success.
   Out->keep();
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp
index aeb977a..447d55a 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -355,7 +355,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
     StringRef SymName = VFTableEntry.second;
     outs() << VFTableName << '[' << Offset << "]: " << SymName << '\n';
   }
-  for (const std::pair<StringRef, ArrayRef<little32_t>> &VBTable : VBTables) {
+  for (const auto &VBTable : VBTables) {
     StringRef VBTableName = VBTable.first;
     uint32_t Idx = 0;
     for (little32_t Offset : VBTable.second) {
@@ -363,7 +363,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
       Idx += sizeof(Offset);
     }
   }
-  for (const std::pair<StringRef, CompleteObjectLocator> &COLPair : COLs) {
+  for (const auto &COLPair : COLs) {
     StringRef COLName = COLPair.first;
     const CompleteObjectLocator &COL = COLPair.second;
     outs() << COLName << "[IsImageRelative]: " << COL.Data[0] << '\n';
@@ -373,7 +373,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
     outs() << COLName << "[ClassHierarchyDescriptor]: " << COL.Symbols[1]
            << '\n';
   }
-  for (const std::pair<StringRef, ClassHierarchyDescriptor> &CHDPair : CHDs) {
+  for (const auto &CHDPair : CHDs) {
     StringRef CHDName = CHDPair.first;
     const ClassHierarchyDescriptor &CHD = CHDPair.second;
     outs() << CHDName << "[AlwaysZero]: " << CHD.Data[0] << '\n';
@@ -381,14 +381,13 @@ static void dumpCXXData(const ObjectFile *Obj) {
     outs() << CHDName << "[NumClasses]: " << CHD.Data[2] << '\n';
     outs() << CHDName << "[BaseClassArray]: " << CHD.Symbols[0] << '\n';
   }
-  for (const std::pair<std::pair<StringRef, uint64_t>, StringRef> &BCAEntry :
-       BCAEntries) {
+  for (const auto &BCAEntry : BCAEntries) {
     StringRef BCAName = BCAEntry.first.first;
     uint64_t Offset = BCAEntry.first.second;
     StringRef SymName = BCAEntry.second;
     outs() << BCAName << '[' << Offset << "]: " << SymName << '\n';
   }
-  for (const std::pair<StringRef, BaseClassDescriptor> &BCDPair : BCDs) {
+  for (const auto &BCDPair : BCDs) {
     StringRef BCDName = BCDPair.first;
     const BaseClassDescriptor &BCD = BCDPair.second;
     outs() << BCDName << "[TypeDescriptor]: " << BCD.Symbols[0] << '\n';
@@ -400,7 +399,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
     outs() << BCDName << "[ClassHierarchyDescriptor]: " << BCD.Symbols[1]
            << '\n';
   }
-  for (const std::pair<StringRef, TypeDescriptor> &TDPair : TDs) {
+  for (const auto &TDPair : TDs) {
     StringRef TDName = TDPair.first;
     const TypeDescriptor &TD = TDPair.second;
     outs() << TDName << "[VFPtr]: " << TD.Symbols[0] << '\n';
@@ -410,7 +409,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
                          /*UseHexEscapes=*/true)
         << '\n';
   }
-  for (const std::pair<StringRef, ThrowInfo> &TIPair : TIs) {
+  for (const auto &TIPair : TIs) {
     StringRef TIName = TIPair.first;
     const ThrowInfo &TI = TIPair.second;
     auto dumpThrowInfoFlag = [&](const char *Name, uint32_t Flag) {
@@ -429,7 +428,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
     dumpThrowInfoSymbol("ForwardCompat", 8);
     dumpThrowInfoSymbol("CatchableTypeArray", 12);
   }
-  for (const std::pair<StringRef, CatchableTypeArray> &CTAPair : CTAs) {
+  for (const auto &CTAPair : CTAs) {
     StringRef CTAName = CTAPair.first;
     const CatchableTypeArray &CTA = CTAPair.second;
 
@@ -441,7 +440,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
          I != E; ++I)
       outs() << CTAName << '[' << Idx++ << "]: " << I->second << '\n';
   }
-  for (const std::pair<StringRef, CatchableType> &CTPair : CTs) {
+  for (const auto &CTPair : CTs) {
     StringRef CTName = CTPair.first;
     const CatchableType &CT = CTPair.second;
     auto dumpCatchableTypeFlag = [&](const char *Name, uint32_t Flag) {
@@ -464,14 +463,13 @@ static void dumpCXXData(const ObjectFile *Obj) {
            << "[CopyCtor]: " << (CT.Symbols[1].empty() ? "null" : CT.Symbols[1])
            << '\n';
   }
-  for (const std::pair<std::pair<StringRef, uint64_t>, StringRef> &VTTPair :
-       VTTEntries) {
+  for (const auto &VTTPair : VTTEntries) {
     StringRef VTTName = VTTPair.first.first;
     uint64_t VTTOffset = VTTPair.first.second;
     StringRef VTTEntry = VTTPair.second;
     outs() << VTTName << '[' << VTTOffset << "]: " << VTTEntry << '\n';
   }
-  for (const std::pair<StringRef, StringRef> &TIPair : TINames) {
+  for (const auto &TIPair : TINames) {
     StringRef TIName = TIPair.first;
     outs() << TIName << ": " << TIPair.second << '\n';
   }
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 1c3a9ce..1ff2302 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -54,16 +54,18 @@ static cl::opt<bool>
 ShowAnnotations("show-annotations",
                 cl::desc("Add informational comments to the .ll file"));
 
+static cl::opt<bool> PreserveAssemblyUseListOrder(
+    "preserve-ll-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM assembly."),
+    cl::init(false), cl::Hidden);
+
 namespace {
 
 static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
   OS << DL.getLine() << ":" << DL.getCol();
-  if (MDNode *N = DL.getInlinedAt(getGlobalContext())) {
-    DebugLoc IDL = DebugLoc::getFromDILocation(N);
-    if (!IDL.isUnknown()) {
-      OS << "@";
-      printDebugLoc(IDL,OS);
-    }
+  if (MDLocation *IDL = DL.getInlinedAt()) {
+    OS << "@";
+    printDebugLoc(IDL, OS);
   }
 }
 class CommentWriter : public AssemblyAnnotationWriter {
@@ -81,8 +83,7 @@ public:
       OS << "; [#uses=" << V.getNumUses() << " type=" << *V.getType() << "]";  // Output # uses and type
     }
     if (const Instruction *I = dyn_cast<Instruction>(&V)) {
-      const DebugLoc &DL = I->getDebugLoc();
-      if (!DL.isUnknown()) {
+      if (const DebugLoc &DL = I->getDebugLoc()) {
         if (!Padded) {
           OS.PadToColumn(50);
           Padded = true;
@@ -98,7 +99,7 @@ public:
           OS.PadToColumn(50);
           OS << ";";
         }
-        OS << " [debug variable = " << Var.getName() << "]";
+        OS << " [debug variable = " << Var->getName() << "]";
       }
       else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
         DIVariable Var(DVI->getVariable());
@@ -106,7 +107,7 @@ public:
           OS.PadToColumn(50);
           OS << ";";
         }
-        OS << " [debug variable = " << Var.getName() << "]";
+        OS << " [debug variable = " << Var->getName() << "]";
       }
     }
   }
@@ -193,7 +194,7 @@ int main(int argc, char **argv) {
 
   // All that llvm-dis does is write the assembly to a file.
   if (!DontPrint)
-    M->print(Out->os(), Annotator.get());
+    M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder);
 
   // Declare success.
   Out->keep();
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 8bfd319..936496c 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -90,6 +90,16 @@ static cl::opt<bool>
 OutputAssembly("S",
                cl::desc("Write output as LLVM assembly"), cl::Hidden);
 
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+    "preserve-bc-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM bitcode."),
+    cl::init(true), cl::Hidden);
+
+static cl::opt<bool> PreserveAssemblyUseListOrder(
+    "preserve-ll-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM assembly."),
+    cl::init(false), cl::Hidden);
+
 int main(int argc, char **argv) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
@@ -264,9 +274,10 @@ int main(int argc, char **argv) {
   }
 
   if (OutputAssembly)
-    Passes.add(createPrintModulePass(Out.os()));
+    Passes.add(
+        createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder));
   else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
-    Passes.add(createBitcodeWriterPass(Out.os()));
+    Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder));
 
   Passes.run(*M.get());
 
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index e52191a..bcefc0b 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Linker/Linker.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
@@ -58,6 +59,16 @@ static cl::opt<bool>
 SuppressWarnings("suppress-warnings", cl::desc("Suppress all linking warnings"),
                  cl::init(false));
 
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+    "preserve-bc-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM bitcode."),
+    cl::init(true), cl::Hidden);
+
+static cl::opt<bool> PreserveAssemblyUseListOrder(
+    "preserve-ll-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM assembly."),
+    cl::init(false), cl::Hidden);
+
 // Read the specified bitcode file in and return it. This routine searches the
 // link path for the specified file to try to find it...
 //
@@ -69,6 +80,9 @@ loadFile(const char *argv0, const std::string &FN, LLVMContext &Context) {
   if (!Result)
     Err.print(argv0, errs());
 
+  Result->materializeMetadata();
+  UpgradeDebugInfo(*Result);
+
   return Result;
 }
 
@@ -112,9 +126,9 @@ int main(int argc, char **argv) {
       return 1;
     }
 
-    if (verifyModule(*M)) {
-      errs() << argv[0] << ": input module '" << InputFilenames[i]
-             << "' is broken!\n";
+    if (verifyModule(*M, &errs())) {
+      errs() << argv[0] << ": " << InputFilenames[i]
+             << ": error: input module is broken!\n";
       return 1;
     }
 
@@ -133,16 +147,16 @@ int main(int argc, char **argv) {
     return 1;
   }
 
-  if (verifyModule(*Composite)) {
-    errs() << argv[0] << ": linked module is broken!\n";
+  if (verifyModule(*Composite, &errs())) {
+    errs() << argv[0] << ": error: linked module is broken!\n";
     return 1;
   }
 
   if (Verbose) errs() << "Writing bitcode...\n";
   if (OutputAssembly) {
-    Out.os() << *Composite;
+    Composite->print(Out.os(), nullptr, PreserveAssemblyUseListOrder);
   } else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
-    WriteBitcodeToFile(Composite.get(), Out.os());
+    WriteBitcodeToFile(Composite.get(), Out.os(), PreserveBitcodeUseListOrder);
 
   // Declare success.
   Out.keep();
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 4f9b6fc..6a8b493 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -439,7 +439,8 @@ int main(int argc, char **argv) {
   if (!Out)
     return 1;
 
-  formatted_raw_ostream FOS(Out->os());
+  std::unique_ptr<buffer_ostream> BOS;
+  raw_pwrite_stream *OS = &Out->os();
   std::unique_ptr<MCStreamer> Str;
 
   std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
@@ -448,8 +449,8 @@ int main(int argc, char **argv) {
 
   MCInstPrinter *IP = nullptr;
   if (FileType == OFT_AssemblyFile) {
-    IP =
-      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *MCII, *MRI, *STI);
+    IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
+                                        *MAI, *MCII, *MRI);
 
     // Set the display preference for hex vs. decimal immediates.
     IP->setPrintImmHex(PrintImmHex);
@@ -461,17 +462,24 @@ int main(int argc, char **argv) {
       CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
       MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU);
     }
-    Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/ true,
-                                           /*useDwarfDirectory*/ true, IP, CE,
-                                           MAB, ShowInst));
+    auto FOut = llvm::make_unique<formatted_raw_ostream>(*OS);
+    Str.reset(TheTarget->createAsmStreamer(
+        Ctx, std::move(FOut), /*asmverbose*/ true,
+        /*useDwarfDirectory*/ true, IP, CE, MAB, ShowInst));
 
   } else if (FileType == OFT_Null) {
     Str.reset(TheTarget->createNullStreamer(Ctx));
   } else {
     assert(FileType == OFT_ObjectFile && "Invalid file type!");
+
+    if (!Out->os().supportsSeeking()) {
+      BOS = make_unique<buffer_ostream>(Out->os());
+      OS = BOS.get();
+    }
+
     MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
     MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU);
-    Str.reset(TheTarget->createMCObjectStreamer(TheTriple, Ctx, *MAB, FOS, CE,
+    Str.reset(TheTarget->createMCObjectStreamer(TheTriple, Ctx, *MAB, *OS, CE,
                                                 *STI, RelaxAll,
                                                 /*DWARFMustBeAtTheEnd*/ false));
     if (NoExecStack)
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 86504e6..a491a37 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -99,6 +99,9 @@ cl::list<std::string>
                        cl::desc("Prints the specified segment,section for "
                                 "Mach-O objects (requires -macho)"));
 
+cl::opt<bool> llvm::Raw("raw",
+                        cl::desc("Have -section dump the raw binary contents"));
+
 cl::opt<bool>
     llvm::InfoPlist("info-plist",
                     cl::desc("Print the info plist section as strings for "
@@ -119,6 +122,11 @@ cl::opt<bool>
                      cl::desc("Print the info for Mach-O objects in "
                               "non-verbose or numeric form (requires -macho)"));
 
+cl::opt<bool>
+    llvm::ObjcMetaData("objc-meta-data",
+                       cl::desc("Print the Objective-C runtime meta data for "
+                                "Mach-O files (requires -macho)"));
+
 cl::opt<std::string> llvm::DisSymName(
     "dis-symname",
     cl::desc("disassemble just this symbol's instructions (requires -macho"));
@@ -127,7 +135,6 @@ static cl::opt<bool> NoSymbolicOperands(
     "no-symbolic-operands",
     cl::desc("do not symbolic operands when disassembling (requires -macho)"));
 
-
 static cl::list<std::string>
     ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
               cl::ZeroOrMore);
@@ -605,7 +612,8 @@ static void CreateSymbolAddressMap(MachOObjectFile *O,
       Symbol.getAddress(Address);
       StringRef SymName;
       Symbol.getName(SymName);
-      (*AddrMap)[Address] = SymName;
+      if (!SymName.startswith(".objc"))
+        (*AddrMap)[Address] = SymName;
     }
   }
 }
@@ -1017,6 +1025,8 @@ static void DumpRawSectionContents(MachOObjectFile *O, const char *sect,
 
 static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
                              StringRef DisSegName, StringRef DisSectName);
+static void DumpProtocolSection(MachOObjectFile *O, const char *sect,
+                                uint32_t size, uint32_t addr);
 
 static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
                                 bool verbose) {
@@ -1043,8 +1053,7 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
       StringRef SegName = O->getSectionFinalSegmentName(Ref);
       if ((DumpSegName.empty() || SegName == DumpSegName) &&
           (SectName == DumpSectName)) {
-        outs() << "Contents of (" << SegName << "," << SectName
-               << ") section\n";
+
         uint32_t section_flags;
         if (O->is64Bit()) {
           const MachO::section_64 Sec = O->getSection64(Ref);
@@ -1062,6 +1071,14 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
         uint32_t sect_size = BytesStr.size();
         uint64_t sect_addr = Section.getAddress();
 
+        if (Raw) {
+          outs().write(BytesStr.data(), BytesStr.size());
+          continue;
+        }
+
+        outs() << "Contents of (" << SegName << "," << SectName
+               << ") section\n";
+
         if (verbose) {
           if ((section_flags & MachO::S_ATTR_PURE_INSTRUCTIONS) ||
               (section_flags & MachO::S_ATTR_SOME_INSTRUCTIONS)) {
@@ -1072,6 +1089,10 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
             outs() << sect;
             continue;
           }
+          if (SegName == "__OBJC" && SectName == "__protocol") {
+            DumpProtocolSection(O, sect, sect_size, sect_addr);
+            continue;
+          }
           switch (section_type) {
           case MachO::S_REGULAR:
             DumpRawSectionContents(O, sect, sect_size, sect_addr);
@@ -1089,8 +1110,8 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
             DumpLiteral8Section(O, sect, sect_size, sect_addr, !NoLeadingAddr);
             break;
           case MachO::S_16BYTE_LITERALS:
-	    DumpLiteral16Section(O, sect, sect_size, sect_addr, !NoLeadingAddr);
-	    break;
+            DumpLiteral16Section(O, sect, sect_size, sect_addr, !NoLeadingAddr);
+            break;
           case MachO::S_LITERAL_POINTERS:
             DumpLiteralPointerSection(O, Section, sect, sect_size, sect_addr,
                                       !NoLeadingAddr);
@@ -1169,6 +1190,8 @@ static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) {
   return true;
 }
 
+static void printObjcMetaData(MachOObjectFile *O, bool verbose);
+
 // ProcessMachO() is passed a single opened Mach-O file, which may be an
 // archive member and or in a slice of a universal file.  It prints the
 // the file name and header info and then processes it according to the
@@ -1181,7 +1204,8 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
   // UniversalHeaders or ArchiveHeaders.
   if (Disassemble || PrivateHeaders || ExportsTrie || Rebase || Bind ||
       LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints ||
-      DylibsUsed || DylibId || DumpSections.size() != 0) {
+      DylibsUsed || DylibId || ObjcMetaData ||
+      (DumpSections.size() != 0 && !Raw)) {
     outs() << Filename;
     if (!ArchiveMemberName.empty())
       outs() << '(' << ArchiveMemberName << ')';
@@ -1218,6 +1242,8 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
     printMachOUnwindInfo(MachOOF);
   if (PrivateHeaders)
     printMachOFileHeader(MachOOF);
+  if (ObjcMetaData)
+    printObjcMetaData(MachOOF, !NonVerbose);
   if (ExportsTrie)
     printExportsTrie(MachOOF);
   if (Rebase)
@@ -2385,13 +2411,22 @@ static uint64_t GuessPointerPointer(uint64_t ReferenceValue,
 // section nullptr is returned.
 static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
                                   uint32_t &left, SectionRef &S,
-                                  DisassembleInfo *info) {
+                                  DisassembleInfo *info,
+                                  bool objc_only = false) {
   offset = 0;
   left = 0;
   S = SectionRef();
   for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) {
     uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress();
     uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize();
+    if (objc_only) {
+      StringRef SectName;
+      ((*(info->Sections))[SectIdx]).getName(SectName);
+      DataRefImpl Ref = ((*(info->Sections))[SectIdx]).getRawDataRefImpl();
+      StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
+      if (SegName != "__OBJC" && SectName != "__cstring")
+        continue;
+    }
     if (Address >= SectAddress && Address < SectAddress + SectSize) {
       S = (*(info->Sections))[SectIdx];
       offset = Address - SectAddress;
@@ -2404,11 +2439,22 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
   return nullptr;
 }
 
+static const char *get_pointer_32(uint32_t Address, uint32_t &offset,
+                                  uint32_t &left, SectionRef &S,
+                                  DisassembleInfo *info,
+                                  bool objc_only = false) {
+  return get_pointer_64(Address, offset, left, S, info, objc_only);
+}
+
 // get_symbol_64() returns the name of a symbol (or nullptr) and the address of
 // the symbol indirectly through n_value. Based on the relocation information
 // for the specified section offset in the specified section reference.
-static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
-                                 DisassembleInfo *info, uint64_t &n_value) {
+// If no relocation information is found and a non-zero ReferenceValue for the
+// symbol is passed, look up that address in the info's AddrMap.
+static const char *
+get_symbol_64(uint32_t sect_offset, SectionRef S, DisassembleInfo *info,
+              uint64_t &n_value,
+              uint64_t ReferenceValue = UnknownAddressOrSize) {
   n_value = 0;
   if (!info->verbose)
     return nullptr;
@@ -2442,6 +2488,8 @@ static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
   const char *SymbolName = nullptr;
   if (reloc_found && isExtern) {
     Symbol.getAddress(n_value);
+    if (n_value == UnknownAddressOrSize)
+      n_value = 0;
     StringRef name;
     Symbol.getName(name);
     if (!name.empty()) {
@@ -2459,17 +2507,21 @@ static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
   //
   // NOTE: need add passing the database_offset to this routine.
 
-  // TODO: We did not find an external relocation entry so look up the
-  // ReferenceValue as an address of a symbol and if found return that symbol's
-  // name.
-  //
-  // NOTE: need add passing the ReferenceValue to this routine.  Then that code
-  // would simply be this:
-  // SymbolName = GuessSymbolName(ReferenceValue, info->AddrMap);
+  // We did not find an external relocation entry so look up the ReferenceValue
+  // as an address of a symbol and if found return that symbol's name.
+  if (ReferenceValue != UnknownAddressOrSize)
+    SymbolName = GuessSymbolName(ReferenceValue, info->AddrMap);
 
   return SymbolName;
 }
 
+static const char *get_symbol_32(uint32_t sect_offset, SectionRef S,
+                                 DisassembleInfo *info,
+                                 uint32_t ReferenceValue) {
+  uint64_t n_value64;
+  return get_symbol_64(sect_offset, S, info, n_value64, ReferenceValue);
+}
+
 // These are structs in the Objective-C meta data and read to produce the
 // comments for disassembly.  While these are part of the ABI they are no
 // public defintions.  So the are here not in include/llvm/Support/MachO.h .
@@ -2491,6 +2543,14 @@ struct class64_t {
   uint64_t data;       // class_ro64_t * (64-bit pointer)
 };
 
+struct class32_t {
+  uint32_t isa;        /* class32_t * (32-bit pointer) */
+  uint32_t superclass; /* class32_t * (32-bit pointer) */
+  uint32_t cache;      /* Cache (32-bit pointer) */
+  uint32_t vtable;     /* IMP * (32-bit pointer) */
+  uint32_t data;       /* class_ro32_t * (32-bit pointer) */
+};
+
 struct class_ro64_t {
   uint32_t flags;
   uint32_t instanceStart;
@@ -2505,6 +2565,276 @@ struct class_ro64_t {
   uint64_t baseProperties; // const struct objc_property_list (64-bit pointer)
 };
 
+struct class_ro32_t {
+  uint32_t flags;
+  uint32_t instanceStart;
+  uint32_t instanceSize;
+  uint32_t ivarLayout;     /* const uint8_t * (32-bit pointer) */
+  uint32_t name;           /* const char * (32-bit pointer) */
+  uint32_t baseMethods;    /* const method_list_t * (32-bit pointer) */
+  uint32_t baseProtocols;  /* const protocol_list_t * (32-bit pointer) */
+  uint32_t ivars;          /* const ivar_list_t * (32-bit pointer) */
+  uint32_t weakIvarLayout; /* const uint8_t * (32-bit pointer) */
+  uint32_t baseProperties; /* const struct objc_property_list *
+                                                   (32-bit pointer) */
+};
+
+/* Values for class_ro{64,32}_t->flags */
+#define RO_META (1 << 0)
+#define RO_ROOT (1 << 1)
+#define RO_HAS_CXX_STRUCTORS (1 << 2)
+
+struct method_list64_t {
+  uint32_t entsize;
+  uint32_t count;
+  /* struct method64_t first;  These structures follow inline */
+};
+
+struct method_list32_t {
+  uint32_t entsize;
+  uint32_t count;
+  /* struct method32_t first;  These structures follow inline */
+};
+
+struct method64_t {
+  uint64_t name;  /* SEL (64-bit pointer) */
+  uint64_t types; /* const char * (64-bit pointer) */
+  uint64_t imp;   /* IMP (64-bit pointer) */
+};
+
+struct method32_t {
+  uint32_t name;  /* SEL (32-bit pointer) */
+  uint32_t types; /* const char * (32-bit pointer) */
+  uint32_t imp;   /* IMP (32-bit pointer) */
+};
+
+struct protocol_list64_t {
+  uint64_t count; /* uintptr_t (a 64-bit value) */
+  /* struct protocol64_t * list[0];  These pointers follow inline */
+};
+
+struct protocol_list32_t {
+  uint32_t count; /* uintptr_t (a 32-bit value) */
+  /* struct protocol32_t * list[0];  These pointers follow inline */
+};
+
+struct protocol64_t {
+  uint64_t isa;                     /* id * (64-bit pointer) */
+  uint64_t name;                    /* const char * (64-bit pointer) */
+  uint64_t protocols;               /* struct protocol_list64_t *
+                                                    (64-bit pointer) */
+  uint64_t instanceMethods;         /* method_list_t * (64-bit pointer) */
+  uint64_t classMethods;            /* method_list_t * (64-bit pointer) */
+  uint64_t optionalInstanceMethods; /* method_list_t * (64-bit pointer) */
+  uint64_t optionalClassMethods;    /* method_list_t * (64-bit pointer) */
+  uint64_t instanceProperties;      /* struct objc_property_list *
+                                                       (64-bit pointer) */
+};
+
+struct protocol32_t {
+  uint32_t isa;                     /* id * (32-bit pointer) */
+  uint32_t name;                    /* const char * (32-bit pointer) */
+  uint32_t protocols;               /* struct protocol_list_t *
+                                                    (32-bit pointer) */
+  uint32_t instanceMethods;         /* method_list_t * (32-bit pointer) */
+  uint32_t classMethods;            /* method_list_t * (32-bit pointer) */
+  uint32_t optionalInstanceMethods; /* method_list_t * (32-bit pointer) */
+  uint32_t optionalClassMethods;    /* method_list_t * (32-bit pointer) */
+  uint32_t instanceProperties;      /* struct objc_property_list *
+                                                       (32-bit pointer) */
+};
+
+struct ivar_list64_t {
+  uint32_t entsize;
+  uint32_t count;
+  /* struct ivar64_t first;  These structures follow inline */
+};
+
+struct ivar_list32_t {
+  uint32_t entsize;
+  uint32_t count;
+  /* struct ivar32_t first;  These structures follow inline */
+};
+
+struct ivar64_t {
+  uint64_t offset; /* uintptr_t * (64-bit pointer) */
+  uint64_t name;   /* const char * (64-bit pointer) */
+  uint64_t type;   /* const char * (64-bit pointer) */
+  uint32_t alignment;
+  uint32_t size;
+};
+
+struct ivar32_t {
+  uint32_t offset; /* uintptr_t * (32-bit pointer) */
+  uint32_t name;   /* const char * (32-bit pointer) */
+  uint32_t type;   /* const char * (32-bit pointer) */
+  uint32_t alignment;
+  uint32_t size;
+};
+
+struct objc_property_list64 {
+  uint32_t entsize;
+  uint32_t count;
+  /* struct objc_property64 first;  These structures follow inline */
+};
+
+struct objc_property_list32 {
+  uint32_t entsize;
+  uint32_t count;
+  /* struct objc_property32 first;  These structures follow inline */
+};
+
+struct objc_property64 {
+  uint64_t name;       /* const char * (64-bit pointer) */
+  uint64_t attributes; /* const char * (64-bit pointer) */
+};
+
+struct objc_property32 {
+  uint32_t name;       /* const char * (32-bit pointer) */
+  uint32_t attributes; /* const char * (32-bit pointer) */
+};
+
+struct category64_t {
+  uint64_t name;               /* const char * (64-bit pointer) */
+  uint64_t cls;                /* struct class_t * (64-bit pointer) */
+  uint64_t instanceMethods;    /* struct method_list_t * (64-bit pointer) */
+  uint64_t classMethods;       /* struct method_list_t * (64-bit pointer) */
+  uint64_t protocols;          /* struct protocol_list_t * (64-bit pointer) */
+  uint64_t instanceProperties; /* struct objc_property_list *
+                                  (64-bit pointer) */
+};
+
+struct category32_t {
+  uint32_t name;               /* const char * (32-bit pointer) */
+  uint32_t cls;                /* struct class_t * (32-bit pointer) */
+  uint32_t instanceMethods;    /* struct method_list_t * (32-bit pointer) */
+  uint32_t classMethods;       /* struct method_list_t * (32-bit pointer) */
+  uint32_t protocols;          /* struct protocol_list_t * (32-bit pointer) */
+  uint32_t instanceProperties; /* struct objc_property_list *
+                                  (32-bit pointer) */
+};
+
+struct objc_image_info64 {
+  uint32_t version;
+  uint32_t flags;
+};
+struct objc_image_info32 {
+  uint32_t version;
+  uint32_t flags;
+};
+struct imageInfo_t {
+  uint32_t version;
+  uint32_t flags;
+};
+/* masks for objc_image_info.flags */
+#define OBJC_IMAGE_IS_REPLACEMENT (1 << 0)
+#define OBJC_IMAGE_SUPPORTS_GC (1 << 1)
+
+struct message_ref64 {
+  uint64_t imp; /* IMP (64-bit pointer) */
+  uint64_t sel; /* SEL (64-bit pointer) */
+};
+
+struct message_ref32 {
+  uint32_t imp; /* IMP (32-bit pointer) */
+  uint32_t sel; /* SEL (32-bit pointer) */
+};
+
+// Objective-C 1 (32-bit only) meta data structs.
+
+struct objc_module_t {
+  uint32_t version;
+  uint32_t size;
+  uint32_t name;   /* char * (32-bit pointer) */
+  uint32_t symtab; /* struct objc_symtab * (32-bit pointer) */
+};
+
+struct objc_symtab_t {
+  uint32_t sel_ref_cnt;
+  uint32_t refs; /* SEL * (32-bit pointer) */
+  uint16_t cls_def_cnt;
+  uint16_t cat_def_cnt;
+  // uint32_t defs[1];        /* void * (32-bit pointer) variable size */
+};
+
+struct objc_class_t {
+  uint32_t isa;         /* struct objc_class * (32-bit pointer) */
+  uint32_t super_class; /* struct objc_class * (32-bit pointer) */
+  uint32_t name;        /* const char * (32-bit pointer) */
+  int32_t version;
+  int32_t info;
+  int32_t instance_size;
+  uint32_t ivars;       /* struct objc_ivar_list * (32-bit pointer) */
+  uint32_t methodLists; /* struct objc_method_list ** (32-bit pointer) */
+  uint32_t cache;       /* struct objc_cache * (32-bit pointer) */
+  uint32_t protocols;   /* struct objc_protocol_list * (32-bit pointer) */
+};
+
+#define CLS_GETINFO(cls, infomask) ((cls)->info & (infomask))
+// class is not a metaclass
+#define CLS_CLASS 0x1
+// class is a metaclass
+#define CLS_META 0x2
+
+struct objc_category_t {
+  uint32_t category_name;    /* char * (32-bit pointer) */
+  uint32_t class_name;       /* char * (32-bit pointer) */
+  uint32_t instance_methods; /* struct objc_method_list * (32-bit pointer) */
+  uint32_t class_methods;    /* struct objc_method_list * (32-bit pointer) */
+  uint32_t protocols;        /* struct objc_protocol_list * (32-bit ptr) */
+};
+
+struct objc_ivar_t {
+  uint32_t ivar_name; /* char * (32-bit pointer) */
+  uint32_t ivar_type; /* char * (32-bit pointer) */
+  int32_t ivar_offset;
+};
+
+struct objc_ivar_list_t {
+  int32_t ivar_count;
+  // struct objc_ivar_t ivar_list[1];          /* variable length structure */
+};
+
+struct objc_method_list_t {
+  uint32_t obsolete; /* struct objc_method_list * (32-bit pointer) */
+  int32_t method_count;
+  // struct objc_method_t method_list[1];      /* variable length structure */
+};
+
+struct objc_method_t {
+  uint32_t method_name;  /* SEL, aka struct objc_selector * (32-bit pointer) */
+  uint32_t method_types; /* char * (32-bit pointer) */
+  uint32_t method_imp;   /* IMP, aka function pointer, (*IMP)(id, SEL, ...)
+                            (32-bit pointer) */
+};
+
+struct objc_protocol_list_t {
+  uint32_t next; /* struct objc_protocol_list * (32-bit pointer) */
+  int32_t count;
+  // uint32_t list[1];   /* Protocol *, aka struct objc_protocol_t *
+  //                        (32-bit pointer) */
+};
+
+struct objc_protocol_t {
+  uint32_t isa;              /* struct objc_class * (32-bit pointer) */
+  uint32_t protocol_name;    /* char * (32-bit pointer) */
+  uint32_t protocol_list;    /* struct objc_protocol_list * (32-bit pointer) */
+  uint32_t instance_methods; /* struct objc_method_description_list *
+                                (32-bit pointer) */
+  uint32_t class_methods;    /* struct objc_method_description_list *
+                                (32-bit pointer) */
+};
+
+struct objc_method_description_list_t {
+  int32_t count;
+  // struct objc_method_description_t list[1];
+};
+
+struct objc_method_description_t {
+  uint32_t name;  /* SEL, aka struct objc_selector * (32-bit pointer) */
+  uint32_t types; /* char * (32-bit pointer) */
+};
+
 inline void swapStruct(struct cfstring64_t &cfs) {
   sys::swapByteOrder(cfs.isa);
   sys::swapByteOrder(cfs.flags);
@@ -2520,6 +2850,14 @@ inline void swapStruct(struct class64_t &c) {
   sys::swapByteOrder(c.data);
 }
 
+inline void swapStruct(struct class32_t &c) {
+  sys::swapByteOrder(c.isa);
+  sys::swapByteOrder(c.superclass);
+  sys::swapByteOrder(c.cache);
+  sys::swapByteOrder(c.vtable);
+  sys::swapByteOrder(c.data);
+}
+
 inline void swapStruct(struct class_ro64_t &cro) {
   sys::swapByteOrder(cro.flags);
   sys::swapByteOrder(cro.instanceStart);
@@ -2534,6 +2872,238 @@ inline void swapStruct(struct class_ro64_t &cro) {
   sys::swapByteOrder(cro.baseProperties);
 }
 
+inline void swapStruct(struct class_ro32_t &cro) {
+  sys::swapByteOrder(cro.flags);
+  sys::swapByteOrder(cro.instanceStart);
+  sys::swapByteOrder(cro.instanceSize);
+  sys::swapByteOrder(cro.ivarLayout);
+  sys::swapByteOrder(cro.name);
+  sys::swapByteOrder(cro.baseMethods);
+  sys::swapByteOrder(cro.baseProtocols);
+  sys::swapByteOrder(cro.ivars);
+  sys::swapByteOrder(cro.weakIvarLayout);
+  sys::swapByteOrder(cro.baseProperties);
+}
+
+inline void swapStruct(struct method_list64_t &ml) {
+  sys::swapByteOrder(ml.entsize);
+  sys::swapByteOrder(ml.count);
+}
+
+inline void swapStruct(struct method_list32_t &ml) {
+  sys::swapByteOrder(ml.entsize);
+  sys::swapByteOrder(ml.count);
+}
+
+inline void swapStruct(struct method64_t &m) {
+  sys::swapByteOrder(m.name);
+  sys::swapByteOrder(m.types);
+  sys::swapByteOrder(m.imp);
+}
+
+inline void swapStruct(struct method32_t &m) {
+  sys::swapByteOrder(m.name);
+  sys::swapByteOrder(m.types);
+  sys::swapByteOrder(m.imp);
+}
+
+inline void swapStruct(struct protocol_list64_t &pl) {
+  sys::swapByteOrder(pl.count);
+}
+
+inline void swapStruct(struct protocol_list32_t &pl) {
+  sys::swapByteOrder(pl.count);
+}
+
+inline void swapStruct(struct protocol64_t &p) {
+  sys::swapByteOrder(p.isa);
+  sys::swapByteOrder(p.name);
+  sys::swapByteOrder(p.protocols);
+  sys::swapByteOrder(p.instanceMethods);
+  sys::swapByteOrder(p.classMethods);
+  sys::swapByteOrder(p.optionalInstanceMethods);
+  sys::swapByteOrder(p.optionalClassMethods);
+  sys::swapByteOrder(p.instanceProperties);
+}
+
+inline void swapStruct(struct protocol32_t &p) {
+  sys::swapByteOrder(p.isa);
+  sys::swapByteOrder(p.name);
+  sys::swapByteOrder(p.protocols);
+  sys::swapByteOrder(p.instanceMethods);
+  sys::swapByteOrder(p.classMethods);
+  sys::swapByteOrder(p.optionalInstanceMethods);
+  sys::swapByteOrder(p.optionalClassMethods);
+  sys::swapByteOrder(p.instanceProperties);
+}
+
+inline void swapStruct(struct ivar_list64_t &il) {
+  sys::swapByteOrder(il.entsize);
+  sys::swapByteOrder(il.count);
+}
+
+inline void swapStruct(struct ivar_list32_t &il) {
+  sys::swapByteOrder(il.entsize);
+  sys::swapByteOrder(il.count);
+}
+
+inline void swapStruct(struct ivar64_t &i) {
+  sys::swapByteOrder(i.offset);
+  sys::swapByteOrder(i.name);
+  sys::swapByteOrder(i.type);
+  sys::swapByteOrder(i.alignment);
+  sys::swapByteOrder(i.size);
+}
+
+inline void swapStruct(struct ivar32_t &i) {
+  sys::swapByteOrder(i.offset);
+  sys::swapByteOrder(i.name);
+  sys::swapByteOrder(i.type);
+  sys::swapByteOrder(i.alignment);
+  sys::swapByteOrder(i.size);
+}
+
+inline void swapStruct(struct objc_property_list64 &pl) {
+  sys::swapByteOrder(pl.entsize);
+  sys::swapByteOrder(pl.count);
+}
+
+inline void swapStruct(struct objc_property_list32 &pl) {
+  sys::swapByteOrder(pl.entsize);
+  sys::swapByteOrder(pl.count);
+}
+
+inline void swapStruct(struct objc_property64 &op) {
+  sys::swapByteOrder(op.name);
+  sys::swapByteOrder(op.attributes);
+}
+
+inline void swapStruct(struct objc_property32 &op) {
+  sys::swapByteOrder(op.name);
+  sys::swapByteOrder(op.attributes);
+}
+
+inline void swapStruct(struct category64_t &c) {
+  sys::swapByteOrder(c.name);
+  sys::swapByteOrder(c.cls);
+  sys::swapByteOrder(c.instanceMethods);
+  sys::swapByteOrder(c.classMethods);
+  sys::swapByteOrder(c.protocols);
+  sys::swapByteOrder(c.instanceProperties);
+}
+
+inline void swapStruct(struct category32_t &c) {
+  sys::swapByteOrder(c.name);
+  sys::swapByteOrder(c.cls);
+  sys::swapByteOrder(c.instanceMethods);
+  sys::swapByteOrder(c.classMethods);
+  sys::swapByteOrder(c.protocols);
+  sys::swapByteOrder(c.instanceProperties);
+}
+
+inline void swapStruct(struct objc_image_info64 &o) {
+  sys::swapByteOrder(o.version);
+  sys::swapByteOrder(o.flags);
+}
+
+inline void swapStruct(struct objc_image_info32 &o) {
+  sys::swapByteOrder(o.version);
+  sys::swapByteOrder(o.flags);
+}
+
+inline void swapStruct(struct imageInfo_t &o) {
+  sys::swapByteOrder(o.version);
+  sys::swapByteOrder(o.flags);
+}
+
+inline void swapStruct(struct message_ref64 &mr) {
+  sys::swapByteOrder(mr.imp);
+  sys::swapByteOrder(mr.sel);
+}
+
+inline void swapStruct(struct message_ref32 &mr) {
+  sys::swapByteOrder(mr.imp);
+  sys::swapByteOrder(mr.sel);
+}
+
+inline void swapStruct(struct objc_module_t &module) {
+  sys::swapByteOrder(module.version);
+  sys::swapByteOrder(module.size);
+  sys::swapByteOrder(module.name);
+  sys::swapByteOrder(module.symtab);
+}
+
+inline void swapStruct(struct objc_symtab_t &symtab) {
+  sys::swapByteOrder(symtab.sel_ref_cnt);
+  sys::swapByteOrder(symtab.refs);
+  sys::swapByteOrder(symtab.cls_def_cnt);
+  sys::swapByteOrder(symtab.cat_def_cnt);
+}
+
+inline void swapStruct(struct objc_class_t &objc_class) {
+  sys::swapByteOrder(objc_class.isa);
+  sys::swapByteOrder(objc_class.super_class);
+  sys::swapByteOrder(objc_class.name);
+  sys::swapByteOrder(objc_class.version);
+  sys::swapByteOrder(objc_class.info);
+  sys::swapByteOrder(objc_class.instance_size);
+  sys::swapByteOrder(objc_class.ivars);
+  sys::swapByteOrder(objc_class.methodLists);
+  sys::swapByteOrder(objc_class.cache);
+  sys::swapByteOrder(objc_class.protocols);
+}
+
+inline void swapStruct(struct objc_category_t &objc_category) {
+  sys::swapByteOrder(objc_category.category_name);
+  sys::swapByteOrder(objc_category.class_name);
+  sys::swapByteOrder(objc_category.instance_methods);
+  sys::swapByteOrder(objc_category.class_methods);
+  sys::swapByteOrder(objc_category.protocols);
+}
+
+inline void swapStruct(struct objc_ivar_list_t &objc_ivar_list) {
+  sys::swapByteOrder(objc_ivar_list.ivar_count);
+}
+
+inline void swapStruct(struct objc_ivar_t &objc_ivar) {
+  sys::swapByteOrder(objc_ivar.ivar_name);
+  sys::swapByteOrder(objc_ivar.ivar_type);
+  sys::swapByteOrder(objc_ivar.ivar_offset);
+}
+
+inline void swapStruct(struct objc_method_list_t &method_list) {
+  sys::swapByteOrder(method_list.obsolete);
+  sys::swapByteOrder(method_list.method_count);
+}
+
+inline void swapStruct(struct objc_method_t &method) {
+  sys::swapByteOrder(method.method_name);
+  sys::swapByteOrder(method.method_types);
+  sys::swapByteOrder(method.method_imp);
+}
+
+inline void swapStruct(struct objc_protocol_list_t &protocol_list) {
+  sys::swapByteOrder(protocol_list.next);
+  sys::swapByteOrder(protocol_list.count);
+}
+
+inline void swapStruct(struct objc_protocol_t &protocol) {
+  sys::swapByteOrder(protocol.isa);
+  sys::swapByteOrder(protocol.protocol_name);
+  sys::swapByteOrder(protocol.protocol_list);
+  sys::swapByteOrder(protocol.instance_methods);
+  sys::swapByteOrder(protocol.class_methods);
+}
+
+inline void swapStruct(struct objc_method_description_list_t &mdl) {
+  sys::swapByteOrder(mdl.count);
+}
+
+inline void swapStruct(struct objc_method_description_t &md) {
+  sys::swapByteOrder(md.name);
+  sys::swapByteOrder(md.types);
+}
+
 static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
                                                  struct DisassembleInfo *info);
 
@@ -2642,6 +3212,2411 @@ static uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue,
   return n_value;
 }
 
+static const SectionRef get_section(MachOObjectFile *O, const char *segname,
+                                    const char *sectname) {
+  for (const SectionRef &Section : O->sections()) {
+    StringRef SectName;
+    Section.getName(SectName);
+    DataRefImpl Ref = Section.getRawDataRefImpl();
+    StringRef SegName = O->getSectionFinalSegmentName(Ref);
+    if (SegName == segname && SectName == sectname)
+      return Section;
+  }
+  return SectionRef();
+}
+
+static void
+walk_pointer_list_64(const char *listname, const SectionRef S,
+                     MachOObjectFile *O, struct DisassembleInfo *info,
+                     void (*func)(uint64_t, struct DisassembleInfo *info)) {
+  if (S == SectionRef())
+    return;
+
+  StringRef SectName;
+  S.getName(SectName);
+  DataRefImpl Ref = S.getRawDataRefImpl();
+  StringRef SegName = O->getSectionFinalSegmentName(Ref);
+  outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
+
+  StringRef BytesStr;
+  S.getContents(BytesStr);
+  const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
+
+  for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint64_t)) {
+    uint32_t left = S.getSize() - i;
+    uint32_t size = left < sizeof(uint64_t) ? left : sizeof(uint64_t);
+    uint64_t p = 0;
+    memcpy(&p, Contents + i, size);
+    if (i + sizeof(uint64_t) > S.getSize())
+      outs() << listname << " list pointer extends past end of (" << SegName
+             << "," << SectName << ") section\n";
+    outs() << format("%016" PRIx64, S.getAddress() + i) << " ";
+
+    if (O->isLittleEndian() != sys::IsLittleEndianHost)
+      sys::swapByteOrder(p);
+
+    uint64_t n_value = 0;
+    const char *name = get_symbol_64(i, S, info, n_value, p);
+    if (name == nullptr)
+      name = get_dyld_bind_info_symbolname(S.getAddress() + i, info);
+
+    if (n_value != 0) {
+      outs() << format("0x%" PRIx64, n_value);
+      if (p != 0)
+        outs() << " + " << format("0x%" PRIx64, p);
+    } else
+      outs() << format("0x%" PRIx64, p);
+    if (name != nullptr)
+      outs() << " " << name;
+    outs() << "\n";
+
+    p += n_value;
+    if (func)
+      func(p, info);
+  }
+}
+
+static void
+walk_pointer_list_32(const char *listname, const SectionRef S,
+                     MachOObjectFile *O, struct DisassembleInfo *info,
+                     void (*func)(uint32_t, struct DisassembleInfo *info)) {
+  if (S == SectionRef())
+    return;
+
+  StringRef SectName;
+  S.getName(SectName);
+  DataRefImpl Ref = S.getRawDataRefImpl();
+  StringRef SegName = O->getSectionFinalSegmentName(Ref);
+  outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
+
+  StringRef BytesStr;
+  S.getContents(BytesStr);
+  const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
+
+  for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint32_t)) {
+    uint32_t left = S.getSize() - i;
+    uint32_t size = left < sizeof(uint32_t) ? left : sizeof(uint32_t);
+    uint32_t p = 0;
+    memcpy(&p, Contents + i, size);
+    if (i + sizeof(uint32_t) > S.getSize())
+      outs() << listname << " list pointer extends past end of (" << SegName
+             << "," << SectName << ") section\n";
+    uint32_t Address = S.getAddress() + i;
+    outs() << format("%08" PRIx32, Address) << " ";
+
+    if (O->isLittleEndian() != sys::IsLittleEndianHost)
+      sys::swapByteOrder(p);
+    outs() << format("0x%" PRIx32, p);
+
+    const char *name = get_symbol_32(i, S, info, p);
+    if (name != nullptr)
+      outs() << " " << name;
+    outs() << "\n";
+
+    if (func)
+      func(p, info);
+  }
+}
+
+static void print_layout_map(const char *layout_map, uint32_t left) {
+  outs() << "                layout map: ";
+  do {
+    outs() << format("0x%02" PRIx32, (*layout_map) & 0xff) << " ";
+    left--;
+    layout_map++;
+  } while (*layout_map != '\0' && left != 0);
+  outs() << "\n";
+}
+
+static void print_layout_map64(uint64_t p, struct DisassembleInfo *info) {
+  uint32_t offset, left;
+  SectionRef S;
+  const char *layout_map;
+
+  if (p == 0)
+    return;
+  layout_map = get_pointer_64(p, offset, left, S, info);
+  print_layout_map(layout_map, left);
+}
+
+static void print_layout_map32(uint32_t p, struct DisassembleInfo *info) {
+  uint32_t offset, left;
+  SectionRef S;
+  const char *layout_map;
+
+  if (p == 0)
+    return;
+  layout_map = get_pointer_32(p, offset, left, S, info);
+  print_layout_map(layout_map, left);
+}
+
+static void print_method_list64_t(uint64_t p, struct DisassembleInfo *info,
+                                  const char *indent) {
+  struct method_list64_t ml;
+  struct method64_t m;
+  const char *r;
+  uint32_t offset, xoffset, left, i;
+  SectionRef S, xS;
+  const char *name, *sym_name;
+  uint64_t n_value;
+
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&ml, '\0', sizeof(struct method_list64_t));
+  if (left < sizeof(struct method_list64_t)) {
+    memcpy(&ml, r, left);
+    outs() << "   (method_list_t entends past the end of the section)\n";
+  } else
+    memcpy(&ml, r, sizeof(struct method_list64_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(ml);
+  outs() << indent << "\t\t   entsize " << ml.entsize << "\n";
+  outs() << indent << "\t\t     count " << ml.count << "\n";
+
+  p += sizeof(struct method_list64_t);
+  offset += sizeof(struct method_list64_t);
+  for (i = 0; i < ml.count; i++) {
+    r = get_pointer_64(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&m, '\0', sizeof(struct method64_t));
+    if (left < sizeof(struct method64_t)) {
+      memcpy(&ml, r, left);
+      outs() << indent << "   (method_t entends past the end of the section)\n";
+    } else
+      memcpy(&m, r, sizeof(struct method64_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(m);
+
+    outs() << indent << "\t\t      name ";
+    sym_name = get_symbol_64(offset + offsetof(struct method64_t, name), S,
+                             info, n_value, m.name);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (m.name != 0)
+        outs() << " + " << format("0x%" PRIx64, m.name);
+    } else
+      outs() << format("0x%" PRIx64, m.name);
+    name = get_pointer_64(m.name + n_value, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << indent << "\t\t     types ";
+    sym_name = get_symbol_64(offset + offsetof(struct method64_t, types), S,
+                             info, n_value, m.types);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (m.types != 0)
+        outs() << " + " << format("0x%" PRIx64, m.types);
+    } else
+      outs() << format("0x%" PRIx64, m.types);
+    name = get_pointer_64(m.types + n_value, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << indent << "\t\t       imp ";
+    name = get_symbol_64(offset + offsetof(struct method64_t, imp), S, info,
+                         n_value, m.imp);
+    if (info->verbose && name == nullptr) {
+      if (n_value != 0) {
+        outs() << format("0x%" PRIx64, n_value) << " ";
+        if (m.imp != 0)
+          outs() << "+ " << format("0x%" PRIx64, m.imp) << " ";
+      } else
+        outs() << format("0x%" PRIx64, m.imp) << " ";
+    }
+    if (name != nullptr)
+      outs() << name;
+    outs() << "\n";
+
+    p += sizeof(struct method64_t);
+    offset += sizeof(struct method64_t);
+  }
+}
+
+static void print_method_list32_t(uint64_t p, struct DisassembleInfo *info,
+                                  const char *indent) {
+  struct method_list32_t ml;
+  struct method32_t m;
+  const char *r, *name;
+  uint32_t offset, xoffset, left, i;
+  SectionRef S, xS;
+
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&ml, '\0', sizeof(struct method_list32_t));
+  if (left < sizeof(struct method_list32_t)) {
+    memcpy(&ml, r, left);
+    outs() << "   (method_list_t entends past the end of the section)\n";
+  } else
+    memcpy(&ml, r, sizeof(struct method_list32_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(ml);
+  outs() << indent << "\t\t   entsize " << ml.entsize << "\n";
+  outs() << indent << "\t\t     count " << ml.count << "\n";
+
+  p += sizeof(struct method_list32_t);
+  offset += sizeof(struct method_list32_t);
+  for (i = 0; i < ml.count; i++) {
+    r = get_pointer_32(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&m, '\0', sizeof(struct method32_t));
+    if (left < sizeof(struct method32_t)) {
+      memcpy(&ml, r, left);
+      outs() << indent << "   (method_t entends past the end of the section)\n";
+    } else
+      memcpy(&m, r, sizeof(struct method32_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(m);
+
+    outs() << indent << "\t\t      name " << format("0x%" PRIx32, m.name);
+    name = get_pointer_32(m.name, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << indent << "\t\t     types " << format("0x%" PRIx32, m.types);
+    name = get_pointer_32(m.types, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << indent << "\t\t       imp " << format("0x%" PRIx32, m.imp);
+    name = get_symbol_32(offset + offsetof(struct method32_t, imp), S, info,
+                         m.imp);
+    if (name != nullptr)
+      outs() << " " << name;
+    outs() << "\n";
+
+    p += sizeof(struct method32_t);
+    offset += sizeof(struct method32_t);
+  }
+}
+
+static bool print_method_list(uint32_t p, struct DisassembleInfo *info) {
+  uint32_t offset, left, xleft;
+  SectionRef S;
+  struct objc_method_list_t method_list;
+  struct objc_method_t method;
+  const char *r, *methods, *name, *SymbolName;
+  int32_t i;
+
+  r = get_pointer_32(p, offset, left, S, info, true);
+  if (r == nullptr)
+    return true;
+
+  outs() << "\n";
+  if (left > sizeof(struct objc_method_list_t)) {
+    memcpy(&method_list, r, sizeof(struct objc_method_list_t));
+  } else {
+    outs() << "\t\t objc_method_list extends past end of the section\n";
+    memset(&method_list, '\0', sizeof(struct objc_method_list_t));
+    memcpy(&method_list, r, left);
+  }
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(method_list);
+
+  outs() << "\t\t         obsolete "
+         << format("0x%08" PRIx32, method_list.obsolete) << "\n";
+  outs() << "\t\t     method_count " << method_list.method_count << "\n";
+
+  methods = r + sizeof(struct objc_method_list_t);
+  for (i = 0; i < method_list.method_count; i++) {
+    if ((i + 1) * sizeof(struct objc_method_t) > left) {
+      outs() << "\t\t remaining method's extend past the of the section\n";
+      break;
+    }
+    memcpy(&method, methods + i * sizeof(struct objc_method_t),
+           sizeof(struct objc_method_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(method);
+
+    outs() << "\t\t      method_name "
+           << format("0x%08" PRIx32, method.method_name);
+    if (info->verbose) {
+      name = get_pointer_32(method.method_name, offset, xleft, S, info, true);
+      if (name != nullptr)
+        outs() << format(" %.*s", xleft, name);
+      else
+        outs() << " (not in an __OBJC section)";
+    }
+    outs() << "\n";
+
+    outs() << "\t\t     method_types "
+           << format("0x%08" PRIx32, method.method_types);
+    if (info->verbose) {
+      name = get_pointer_32(method.method_types, offset, xleft, S, info, true);
+      if (name != nullptr)
+        outs() << format(" %.*s", xleft, name);
+      else
+        outs() << " (not in an __OBJC section)";
+    }
+    outs() << "\n";
+
+    outs() << "\t\t       method_imp "
+           << format("0x%08" PRIx32, method.method_imp) << " ";
+    if (info->verbose) {
+      SymbolName = GuessSymbolName(method.method_imp, info->AddrMap);
+      if (SymbolName != nullptr)
+        outs() << SymbolName;
+    }
+    outs() << "\n";
+  }
+  return false;
+}
+
+static void print_protocol_list64_t(uint64_t p, struct DisassembleInfo *info) {
+  struct protocol_list64_t pl;
+  uint64_t q, n_value;
+  struct protocol64_t pc;
+  const char *r;
+  uint32_t offset, xoffset, left, i;
+  SectionRef S, xS;
+  const char *name, *sym_name;
+
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&pl, '\0', sizeof(struct protocol_list64_t));
+  if (left < sizeof(struct protocol_list64_t)) {
+    memcpy(&pl, r, left);
+    outs() << "   (protocol_list_t entends past the end of the section)\n";
+  } else
+    memcpy(&pl, r, sizeof(struct protocol_list64_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(pl);
+  outs() << "                      count " << pl.count << "\n";
+
+  p += sizeof(struct protocol_list64_t);
+  offset += sizeof(struct protocol_list64_t);
+  for (i = 0; i < pl.count; i++) {
+    r = get_pointer_64(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    q = 0;
+    if (left < sizeof(uint64_t)) {
+      memcpy(&q, r, left);
+      outs() << "   (protocol_t * entends past the end of the section)\n";
+    } else
+      memcpy(&q, r, sizeof(uint64_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      sys::swapByteOrder(q);
+
+    outs() << "\t\t      list[" << i << "] ";
+    sym_name = get_symbol_64(offset, S, info, n_value, q);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (q != 0)
+        outs() << " + " << format("0x%" PRIx64, q);
+    } else
+      outs() << format("0x%" PRIx64, q);
+    outs() << " (struct protocol_t *)\n";
+
+    r = get_pointer_64(q + n_value, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&pc, '\0', sizeof(struct protocol64_t));
+    if (left < sizeof(struct protocol64_t)) {
+      memcpy(&pc, r, left);
+      outs() << "   (protocol_t entends past the end of the section)\n";
+    } else
+      memcpy(&pc, r, sizeof(struct protocol64_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(pc);
+
+    outs() << "\t\t\t      isa " << format("0x%" PRIx64, pc.isa) << "\n";
+
+    outs() << "\t\t\t     name ";
+    sym_name = get_symbol_64(offset + offsetof(struct protocol64_t, name), S,
+                             info, n_value, pc.name);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (pc.name != 0)
+        outs() << " + " << format("0x%" PRIx64, pc.name);
+    } else
+      outs() << format("0x%" PRIx64, pc.name);
+    name = get_pointer_64(pc.name + n_value, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << "\t\t\tprotocols " << format("0x%" PRIx64, pc.protocols) << "\n";
+
+    outs() << "\t\t  instanceMethods ";
+    sym_name =
+        get_symbol_64(offset + offsetof(struct protocol64_t, instanceMethods),
+                      S, info, n_value, pc.instanceMethods);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (pc.instanceMethods != 0)
+        outs() << " + " << format("0x%" PRIx64, pc.instanceMethods);
+    } else
+      outs() << format("0x%" PRIx64, pc.instanceMethods);
+    outs() << " (struct method_list_t *)\n";
+    if (pc.instanceMethods + n_value != 0)
+      print_method_list64_t(pc.instanceMethods + n_value, info, "\t");
+
+    outs() << "\t\t     classMethods ";
+    sym_name =
+        get_symbol_64(offset + offsetof(struct protocol64_t, classMethods), S,
+                      info, n_value, pc.classMethods);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (pc.classMethods != 0)
+        outs() << " + " << format("0x%" PRIx64, pc.classMethods);
+    } else
+      outs() << format("0x%" PRIx64, pc.classMethods);
+    outs() << " (struct method_list_t *)\n";
+    if (pc.classMethods + n_value != 0)
+      print_method_list64_t(pc.classMethods + n_value, info, "\t");
+
+    outs() << "\t  optionalInstanceMethods "
+           << format("0x%" PRIx64, pc.optionalInstanceMethods) << "\n";
+    outs() << "\t     optionalClassMethods "
+           << format("0x%" PRIx64, pc.optionalClassMethods) << "\n";
+    outs() << "\t       instanceProperties "
+           << format("0x%" PRIx64, pc.instanceProperties) << "\n";
+
+    p += sizeof(uint64_t);
+    offset += sizeof(uint64_t);
+  }
+}
+
+static void print_protocol_list32_t(uint32_t p, struct DisassembleInfo *info) {
+  struct protocol_list32_t pl;
+  uint32_t q;
+  struct protocol32_t pc;
+  const char *r;
+  uint32_t offset, xoffset, left, i;
+  SectionRef S, xS;
+  const char *name;
+
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&pl, '\0', sizeof(struct protocol_list32_t));
+  if (left < sizeof(struct protocol_list32_t)) {
+    memcpy(&pl, r, left);
+    outs() << "   (protocol_list_t entends past the end of the section)\n";
+  } else
+    memcpy(&pl, r, sizeof(struct protocol_list32_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(pl);
+  outs() << "                      count " << pl.count << "\n";
+
+  p += sizeof(struct protocol_list32_t);
+  offset += sizeof(struct protocol_list32_t);
+  for (i = 0; i < pl.count; i++) {
+    r = get_pointer_32(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    q = 0;
+    if (left < sizeof(uint32_t)) {
+      memcpy(&q, r, left);
+      outs() << "   (protocol_t * entends past the end of the section)\n";
+    } else
+      memcpy(&q, r, sizeof(uint32_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      sys::swapByteOrder(q);
+    outs() << "\t\t      list[" << i << "] " << format("0x%" PRIx32, q)
+           << " (struct protocol_t *)\n";
+    r = get_pointer_32(q, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&pc, '\0', sizeof(struct protocol32_t));
+    if (left < sizeof(struct protocol32_t)) {
+      memcpy(&pc, r, left);
+      outs() << "   (protocol_t entends past the end of the section)\n";
+    } else
+      memcpy(&pc, r, sizeof(struct protocol32_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(pc);
+    outs() << "\t\t\t      isa " << format("0x%" PRIx32, pc.isa) << "\n";
+    outs() << "\t\t\t     name " << format("0x%" PRIx32, pc.name);
+    name = get_pointer_32(pc.name, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+    outs() << "\t\t\tprotocols " << format("0x%" PRIx32, pc.protocols) << "\n";
+    outs() << "\t\t  instanceMethods "
+           << format("0x%" PRIx32, pc.instanceMethods)
+           << " (struct method_list_t *)\n";
+    if (pc.instanceMethods != 0)
+      print_method_list32_t(pc.instanceMethods, info, "\t");
+    outs() << "\t\t     classMethods " << format("0x%" PRIx32, pc.classMethods)
+           << " (struct method_list_t *)\n";
+    if (pc.classMethods != 0)
+      print_method_list32_t(pc.classMethods, info, "\t");
+    outs() << "\t  optionalInstanceMethods "
+           << format("0x%" PRIx32, pc.optionalInstanceMethods) << "\n";
+    outs() << "\t     optionalClassMethods "
+           << format("0x%" PRIx32, pc.optionalClassMethods) << "\n";
+    outs() << "\t       instanceProperties "
+           << format("0x%" PRIx32, pc.instanceProperties) << "\n";
+    p += sizeof(uint32_t);
+    offset += sizeof(uint32_t);
+  }
+}
+
+static void print_indent(uint32_t indent) {
+  for (uint32_t i = 0; i < indent;) {
+    if (indent - i >= 8) {
+      outs() << "\t";
+      i += 8;
+    } else {
+      for (uint32_t j = i; j < indent; j++)
+        outs() << " ";
+      return;
+    }
+  }
+}
+
+static bool print_method_description_list(uint32_t p, uint32_t indent,
+                                          struct DisassembleInfo *info) {
+  uint32_t offset, left, xleft;
+  SectionRef S;
+  struct objc_method_description_list_t mdl;
+  struct objc_method_description_t md;
+  const char *r, *list, *name;
+  int32_t i;
+
+  r = get_pointer_32(p, offset, left, S, info, true);
+  if (r == nullptr)
+    return true;
+
+  outs() << "\n";
+  if (left > sizeof(struct objc_method_description_list_t)) {
+    memcpy(&mdl, r, sizeof(struct objc_method_description_list_t));
+  } else {
+    print_indent(indent);
+    outs() << " objc_method_description_list extends past end of the section\n";
+    memset(&mdl, '\0', sizeof(struct objc_method_description_list_t));
+    memcpy(&mdl, r, left);
+  }
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(mdl);
+
+  print_indent(indent);
+  outs() << "        count " << mdl.count << "\n";
+
+  list = r + sizeof(struct objc_method_description_list_t);
+  for (i = 0; i < mdl.count; i++) {
+    if ((i + 1) * sizeof(struct objc_method_description_t) > left) {
+      print_indent(indent);
+      outs() << " remaining list entries extend past the of the section\n";
+      break;
+    }
+    print_indent(indent);
+    outs() << "        list[" << i << "]\n";
+    memcpy(&md, list + i * sizeof(struct objc_method_description_t),
+           sizeof(struct objc_method_description_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(md);
+
+    print_indent(indent);
+    outs() << "             name " << format("0x%08" PRIx32, md.name);
+    if (info->verbose) {
+      name = get_pointer_32(md.name, offset, xleft, S, info, true);
+      if (name != nullptr)
+        outs() << format(" %.*s", xleft, name);
+      else
+        outs() << " (not in an __OBJC section)";
+    }
+    outs() << "\n";
+
+    print_indent(indent);
+    outs() << "            types " << format("0x%08" PRIx32, md.types);
+    if (info->verbose) {
+      name = get_pointer_32(md.types, offset, xleft, S, info, true);
+      if (name != nullptr)
+        outs() << format(" %.*s", xleft, name);
+      else
+        outs() << " (not in an __OBJC section)";
+    }
+    outs() << "\n";
+  }
+  return false;
+}
+
+static bool print_protocol_list(uint32_t p, uint32_t indent,
+                                struct DisassembleInfo *info);
+
+static bool print_protocol(uint32_t p, uint32_t indent,
+                           struct DisassembleInfo *info) {
+  uint32_t offset, left;
+  SectionRef S;
+  struct objc_protocol_t protocol;
+  const char *r, *name;
+
+  r = get_pointer_32(p, offset, left, S, info, true);
+  if (r == nullptr)
+    return true;
+
+  outs() << "\n";
+  if (left >= sizeof(struct objc_protocol_t)) {
+    memcpy(&protocol, r, sizeof(struct objc_protocol_t));
+  } else {
+    print_indent(indent);
+    outs() << "            Protocol extends past end of the section\n";
+    memset(&protocol, '\0', sizeof(struct objc_protocol_t));
+    memcpy(&protocol, r, left);
+  }
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(protocol);
+
+  print_indent(indent);
+  outs() << "              isa " << format("0x%08" PRIx32, protocol.isa)
+         << "\n";
+
+  print_indent(indent);
+  outs() << "    protocol_name "
+         << format("0x%08" PRIx32, protocol.protocol_name);
+  if (info->verbose) {
+    name = get_pointer_32(protocol.protocol_name, offset, left, S, info, true);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    else
+      outs() << " (not in an __OBJC section)";
+  }
+  outs() << "\n";
+
+  print_indent(indent);
+  outs() << "    protocol_list "
+         << format("0x%08" PRIx32, protocol.protocol_list);
+  if (print_protocol_list(protocol.protocol_list, indent + 4, info))
+    outs() << " (not in an __OBJC section)\n";
+
+  print_indent(indent);
+  outs() << " instance_methods "
+         << format("0x%08" PRIx32, protocol.instance_methods);
+  if (print_method_description_list(protocol.instance_methods, indent, info))
+    outs() << " (not in an __OBJC section)\n";
+
+  print_indent(indent);
+  outs() << "    class_methods "
+         << format("0x%08" PRIx32, protocol.class_methods);
+  if (print_method_description_list(protocol.class_methods, indent, info))
+    outs() << " (not in an __OBJC section)\n";
+
+  return false;
+}
+
+static bool print_protocol_list(uint32_t p, uint32_t indent,
+                                struct DisassembleInfo *info) {
+  uint32_t offset, left, l;
+  SectionRef S;
+  struct objc_protocol_list_t protocol_list;
+  const char *r, *list;
+  int32_t i;
+
+  r = get_pointer_32(p, offset, left, S, info, true);
+  if (r == nullptr)
+    return true;
+
+  outs() << "\n";
+  if (left > sizeof(struct objc_protocol_list_t)) {
+    memcpy(&protocol_list, r, sizeof(struct objc_protocol_list_t));
+  } else {
+    outs() << "\t\t objc_protocol_list_t extends past end of the section\n";
+    memset(&protocol_list, '\0', sizeof(struct objc_protocol_list_t));
+    memcpy(&protocol_list, r, left);
+  }
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(protocol_list);
+
+  print_indent(indent);
+  outs() << "         next " << format("0x%08" PRIx32, protocol_list.next)
+         << "\n";
+  print_indent(indent);
+  outs() << "        count " << protocol_list.count << "\n";
+
+  list = r + sizeof(struct objc_protocol_list_t);
+  for (i = 0; i < protocol_list.count; i++) {
+    if ((i + 1) * sizeof(uint32_t) > left) {
+      outs() << "\t\t remaining list entries extend past the of the section\n";
+      break;
+    }
+    memcpy(&l, list + i * sizeof(uint32_t), sizeof(uint32_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      sys::swapByteOrder(l);
+
+    print_indent(indent);
+    outs() << "      list[" << i << "] " << format("0x%08" PRIx32, l);
+    if (print_protocol(l, indent, info))
+      outs() << "(not in an __OBJC section)\n";
+  }
+  return false;
+}
+
+static void print_ivar_list64_t(uint64_t p, struct DisassembleInfo *info) {
+  struct ivar_list64_t il;
+  struct ivar64_t i;
+  const char *r;
+  uint32_t offset, xoffset, left, j;
+  SectionRef S, xS;
+  const char *name, *sym_name, *ivar_offset_p;
+  uint64_t ivar_offset, n_value;
+
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&il, '\0', sizeof(struct ivar_list64_t));
+  if (left < sizeof(struct ivar_list64_t)) {
+    memcpy(&il, r, left);
+    outs() << "   (ivar_list_t entends past the end of the section)\n";
+  } else
+    memcpy(&il, r, sizeof(struct ivar_list64_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(il);
+  outs() << "                    entsize " << il.entsize << "\n";
+  outs() << "                      count " << il.count << "\n";
+
+  p += sizeof(struct ivar_list64_t);
+  offset += sizeof(struct ivar_list64_t);
+  for (j = 0; j < il.count; j++) {
+    r = get_pointer_64(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&i, '\0', sizeof(struct ivar64_t));
+    if (left < sizeof(struct ivar64_t)) {
+      memcpy(&i, r, left);
+      outs() << "   (ivar_t entends past the end of the section)\n";
+    } else
+      memcpy(&i, r, sizeof(struct ivar64_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(i);
+
+    outs() << "\t\t\t   offset ";
+    sym_name = get_symbol_64(offset + offsetof(struct ivar64_t, offset), S,
+                             info, n_value, i.offset);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (i.offset != 0)
+        outs() << " + " << format("0x%" PRIx64, i.offset);
+    } else
+      outs() << format("0x%" PRIx64, i.offset);
+    ivar_offset_p = get_pointer_64(i.offset + n_value, xoffset, left, xS, info);
+    if (ivar_offset_p != nullptr && left >= sizeof(*ivar_offset_p)) {
+      memcpy(&ivar_offset, ivar_offset_p, sizeof(ivar_offset));
+      if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+        sys::swapByteOrder(ivar_offset);
+      outs() << " " << ivar_offset << "\n";
+    } else
+      outs() << "\n";
+
+    outs() << "\t\t\t     name ";
+    sym_name = get_symbol_64(offset + offsetof(struct ivar64_t, name), S, info,
+                             n_value, i.name);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (i.name != 0)
+        outs() << " + " << format("0x%" PRIx64, i.name);
+    } else
+      outs() << format("0x%" PRIx64, i.name);
+    name = get_pointer_64(i.name + n_value, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << "\t\t\t     type ";
+    sym_name = get_symbol_64(offset + offsetof(struct ivar64_t, type), S, info,
+                             n_value, i.name);
+    name = get_pointer_64(i.type + n_value, xoffset, left, xS, info);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (i.type != 0)
+        outs() << " + " << format("0x%" PRIx64, i.type);
+    } else
+      outs() << format("0x%" PRIx64, i.type);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << "\t\t\talignment " << i.alignment << "\n";
+    outs() << "\t\t\t     size " << i.size << "\n";
+
+    p += sizeof(struct ivar64_t);
+    offset += sizeof(struct ivar64_t);
+  }
+}
+
+static void print_ivar_list32_t(uint32_t p, struct DisassembleInfo *info) {
+  struct ivar_list32_t il;
+  struct ivar32_t i;
+  const char *r;
+  uint32_t offset, xoffset, left, j;
+  SectionRef S, xS;
+  const char *name, *ivar_offset_p;
+  uint32_t ivar_offset;
+
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&il, '\0', sizeof(struct ivar_list32_t));
+  if (left < sizeof(struct ivar_list32_t)) {
+    memcpy(&il, r, left);
+    outs() << "   (ivar_list_t entends past the end of the section)\n";
+  } else
+    memcpy(&il, r, sizeof(struct ivar_list32_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(il);
+  outs() << "                    entsize " << il.entsize << "\n";
+  outs() << "                      count " << il.count << "\n";
+
+  p += sizeof(struct ivar_list32_t);
+  offset += sizeof(struct ivar_list32_t);
+  for (j = 0; j < il.count; j++) {
+    r = get_pointer_32(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&i, '\0', sizeof(struct ivar32_t));
+    if (left < sizeof(struct ivar32_t)) {
+      memcpy(&i, r, left);
+      outs() << "   (ivar_t entends past the end of the section)\n";
+    } else
+      memcpy(&i, r, sizeof(struct ivar32_t));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(i);
+
+    outs() << "\t\t\t   offset " << format("0x%" PRIx32, i.offset);
+    ivar_offset_p = get_pointer_32(i.offset, xoffset, left, xS, info);
+    if (ivar_offset_p != nullptr && left >= sizeof(*ivar_offset_p)) {
+      memcpy(&ivar_offset, ivar_offset_p, sizeof(ivar_offset));
+      if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+        sys::swapByteOrder(ivar_offset);
+      outs() << " " << ivar_offset << "\n";
+    } else
+      outs() << "\n";
+
+    outs() << "\t\t\t     name " << format("0x%" PRIx32, i.name);
+    name = get_pointer_32(i.name, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << "\t\t\t     type " << format("0x%" PRIx32, i.type);
+    name = get_pointer_32(i.type, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << "\t\t\talignment " << i.alignment << "\n";
+    outs() << "\t\t\t     size " << i.size << "\n";
+
+    p += sizeof(struct ivar32_t);
+    offset += sizeof(struct ivar32_t);
+  }
+}
+
+static void print_objc_property_list64(uint64_t p,
+                                       struct DisassembleInfo *info) {
+  struct objc_property_list64 opl;
+  struct objc_property64 op;
+  const char *r;
+  uint32_t offset, xoffset, left, j;
+  SectionRef S, xS;
+  const char *name, *sym_name;
+  uint64_t n_value;
+
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&opl, '\0', sizeof(struct objc_property_list64));
+  if (left < sizeof(struct objc_property_list64)) {
+    memcpy(&opl, r, left);
+    outs() << "   (objc_property_list entends past the end of the section)\n";
+  } else
+    memcpy(&opl, r, sizeof(struct objc_property_list64));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(opl);
+  outs() << "                    entsize " << opl.entsize << "\n";
+  outs() << "                      count " << opl.count << "\n";
+
+  p += sizeof(struct objc_property_list64);
+  offset += sizeof(struct objc_property_list64);
+  for (j = 0; j < opl.count; j++) {
+    r = get_pointer_64(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&op, '\0', sizeof(struct objc_property64));
+    if (left < sizeof(struct objc_property64)) {
+      memcpy(&op, r, left);
+      outs() << "   (objc_property entends past the end of the section)\n";
+    } else
+      memcpy(&op, r, sizeof(struct objc_property64));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(op);
+
+    outs() << "\t\t\t     name ";
+    sym_name = get_symbol_64(offset + offsetof(struct objc_property64, name), S,
+                             info, n_value, op.name);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (op.name != 0)
+        outs() << " + " << format("0x%" PRIx64, op.name);
+    } else
+      outs() << format("0x%" PRIx64, op.name);
+    name = get_pointer_64(op.name + n_value, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << "\t\t\tattributes ";
+    sym_name =
+        get_symbol_64(offset + offsetof(struct objc_property64, attributes), S,
+                      info, n_value, op.attributes);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (op.attributes != 0)
+        outs() << " + " << format("0x%" PRIx64, op.attributes);
+    } else
+      outs() << format("0x%" PRIx64, op.attributes);
+    name = get_pointer_64(op.attributes + n_value, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    p += sizeof(struct objc_property64);
+    offset += sizeof(struct objc_property64);
+  }
+}
+
+static void print_objc_property_list32(uint32_t p,
+                                       struct DisassembleInfo *info) {
+  struct objc_property_list32 opl;
+  struct objc_property32 op;
+  const char *r;
+  uint32_t offset, xoffset, left, j;
+  SectionRef S, xS;
+  const char *name;
+
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&opl, '\0', sizeof(struct objc_property_list32));
+  if (left < sizeof(struct objc_property_list32)) {
+    memcpy(&opl, r, left);
+    outs() << "   (objc_property_list entends past the end of the section)\n";
+  } else
+    memcpy(&opl, r, sizeof(struct objc_property_list32));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(opl);
+  outs() << "                    entsize " << opl.entsize << "\n";
+  outs() << "                      count " << opl.count << "\n";
+
+  p += sizeof(struct objc_property_list32);
+  offset += sizeof(struct objc_property_list32);
+  for (j = 0; j < opl.count; j++) {
+    r = get_pointer_32(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&op, '\0', sizeof(struct objc_property32));
+    if (left < sizeof(struct objc_property32)) {
+      memcpy(&op, r, left);
+      outs() << "   (objc_property entends past the end of the section)\n";
+    } else
+      memcpy(&op, r, sizeof(struct objc_property32));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(op);
+
+    outs() << "\t\t\t     name " << format("0x%" PRIx32, op.name);
+    name = get_pointer_32(op.name, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    outs() << "\t\t\tattributes " << format("0x%" PRIx32, op.attributes);
+    name = get_pointer_32(op.attributes, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    p += sizeof(struct objc_property32);
+    offset += sizeof(struct objc_property32);
+  }
+}
+
+static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
+                               bool &is_meta_class) {
+  struct class_ro64_t cro;
+  const char *r;
+  uint32_t offset, xoffset, left;
+  SectionRef S, xS;
+  const char *name, *sym_name;
+  uint64_t n_value;
+
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr || left < sizeof(struct class_ro64_t))
+    return;
+  memset(&cro, '\0', sizeof(struct class_ro64_t));
+  if (left < sizeof(struct class_ro64_t)) {
+    memcpy(&cro, r, left);
+    outs() << "   (class_ro_t entends past the end of the section)\n";
+  } else
+    memcpy(&cro, r, sizeof(struct class_ro64_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(cro);
+  outs() << "                    flags " << format("0x%" PRIx32, cro.flags);
+  if (cro.flags & RO_META)
+    outs() << " RO_META";
+  if (cro.flags & RO_ROOT)
+    outs() << " RO_ROOT";
+  if (cro.flags & RO_HAS_CXX_STRUCTORS)
+    outs() << " RO_HAS_CXX_STRUCTORS";
+  outs() << "\n";
+  outs() << "            instanceStart " << cro.instanceStart << "\n";
+  outs() << "             instanceSize " << cro.instanceSize << "\n";
+  outs() << "                 reserved " << format("0x%" PRIx32, cro.reserved)
+         << "\n";
+  outs() << "               ivarLayout " << format("0x%" PRIx64, cro.ivarLayout)
+         << "\n";
+  print_layout_map64(cro.ivarLayout, info);
+
+  outs() << "                     name ";
+  sym_name = get_symbol_64(offset + offsetof(struct class_ro64_t, name), S,
+                           info, n_value, cro.name);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (cro.name != 0)
+      outs() << " + " << format("0x%" PRIx64, cro.name);
+  } else
+    outs() << format("0x%" PRIx64, cro.name);
+  name = get_pointer_64(cro.name + n_value, xoffset, left, xS, info);
+  if (name != nullptr)
+    outs() << format(" %.*s", left, name);
+  outs() << "\n";
+
+  outs() << "              baseMethods ";
+  sym_name = get_symbol_64(offset + offsetof(struct class_ro64_t, baseMethods),
+                           S, info, n_value, cro.baseMethods);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (cro.baseMethods != 0)
+      outs() << " + " << format("0x%" PRIx64, cro.baseMethods);
+  } else
+    outs() << format("0x%" PRIx64, cro.baseMethods);
+  outs() << " (struct method_list_t *)\n";
+  if (cro.baseMethods + n_value != 0)
+    print_method_list64_t(cro.baseMethods + n_value, info, "");
+
+  outs() << "            baseProtocols ";
+  sym_name =
+      get_symbol_64(offset + offsetof(struct class_ro64_t, baseProtocols), S,
+                    info, n_value, cro.baseProtocols);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (cro.baseProtocols != 0)
+      outs() << " + " << format("0x%" PRIx64, cro.baseProtocols);
+  } else
+    outs() << format("0x%" PRIx64, cro.baseProtocols);
+  outs() << "\n";
+  if (cro.baseProtocols + n_value != 0)
+    print_protocol_list64_t(cro.baseProtocols + n_value, info);
+
+  outs() << "                    ivars ";
+  sym_name = get_symbol_64(offset + offsetof(struct class_ro64_t, ivars), S,
+                           info, n_value, cro.ivars);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (cro.ivars != 0)
+      outs() << " + " << format("0x%" PRIx64, cro.ivars);
+  } else
+    outs() << format("0x%" PRIx64, cro.ivars);
+  outs() << "\n";
+  if (cro.ivars + n_value != 0)
+    print_ivar_list64_t(cro.ivars + n_value, info);
+
+  outs() << "           weakIvarLayout ";
+  sym_name =
+      get_symbol_64(offset + offsetof(struct class_ro64_t, weakIvarLayout), S,
+                    info, n_value, cro.weakIvarLayout);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (cro.weakIvarLayout != 0)
+      outs() << " + " << format("0x%" PRIx64, cro.weakIvarLayout);
+  } else
+    outs() << format("0x%" PRIx64, cro.weakIvarLayout);
+  outs() << "\n";
+  print_layout_map64(cro.weakIvarLayout + n_value, info);
+
+  outs() << "           baseProperties ";
+  sym_name =
+      get_symbol_64(offset + offsetof(struct class_ro64_t, baseProperties), S,
+                    info, n_value, cro.baseProperties);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (cro.baseProperties != 0)
+      outs() << " + " << format("0x%" PRIx64, cro.baseProperties);
+  } else
+    outs() << format("0x%" PRIx64, cro.baseProperties);
+  outs() << "\n";
+  if (cro.baseProperties + n_value != 0)
+    print_objc_property_list64(cro.baseProperties + n_value, info);
+
+  is_meta_class = (cro.flags & RO_META) ? true : false;
+}
+
+static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
+                               bool &is_meta_class) {
+  struct class_ro32_t cro;
+  const char *r;
+  uint32_t offset, xoffset, left;
+  SectionRef S, xS;
+  const char *name;
+
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&cro, '\0', sizeof(struct class_ro32_t));
+  if (left < sizeof(struct class_ro32_t)) {
+    memcpy(&cro, r, left);
+    outs() << "   (class_ro_t entends past the end of the section)\n";
+  } else
+    memcpy(&cro, r, sizeof(struct class_ro32_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(cro);
+  outs() << "                    flags " << format("0x%" PRIx32, cro.flags);
+  if (cro.flags & RO_META)
+    outs() << " RO_META";
+  if (cro.flags & RO_ROOT)
+    outs() << " RO_ROOT";
+  if (cro.flags & RO_HAS_CXX_STRUCTORS)
+    outs() << " RO_HAS_CXX_STRUCTORS";
+  outs() << "\n";
+  outs() << "            instanceStart " << cro.instanceStart << "\n";
+  outs() << "             instanceSize " << cro.instanceSize << "\n";
+  outs() << "               ivarLayout " << format("0x%" PRIx32, cro.ivarLayout)
+         << "\n";
+  print_layout_map32(cro.ivarLayout, info);
+
+  outs() << "                     name " << format("0x%" PRIx32, cro.name);
+  name = get_pointer_32(cro.name, xoffset, left, xS, info);
+  if (name != nullptr)
+    outs() << format(" %.*s", left, name);
+  outs() << "\n";
+
+  outs() << "              baseMethods "
+         << format("0x%" PRIx32, cro.baseMethods)
+         << " (struct method_list_t *)\n";
+  if (cro.baseMethods != 0)
+    print_method_list32_t(cro.baseMethods, info, "");
+
+  outs() << "            baseProtocols "
+         << format("0x%" PRIx32, cro.baseProtocols) << "\n";
+  if (cro.baseProtocols != 0)
+    print_protocol_list32_t(cro.baseProtocols, info);
+  outs() << "                    ivars " << format("0x%" PRIx32, cro.ivars)
+         << "\n";
+  if (cro.ivars != 0)
+    print_ivar_list32_t(cro.ivars, info);
+  outs() << "           weakIvarLayout "
+         << format("0x%" PRIx32, cro.weakIvarLayout) << "\n";
+  print_layout_map32(cro.weakIvarLayout, info);
+  outs() << "           baseProperties "
+         << format("0x%" PRIx32, cro.baseProperties) << "\n";
+  if (cro.baseProperties != 0)
+    print_objc_property_list32(cro.baseProperties, info);
+  is_meta_class = (cro.flags & RO_META) ? true : false;
+}
+
+static void print_class64_t(uint64_t p, struct DisassembleInfo *info) {
+  struct class64_t c;
+  const char *r;
+  uint32_t offset, left;
+  SectionRef S;
+  const char *name;
+  uint64_t isa_n_value, n_value;
+
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr || left < sizeof(struct class64_t))
+    return;
+  memset(&c, '\0', sizeof(struct class64_t));
+  if (left < sizeof(struct class64_t)) {
+    memcpy(&c, r, left);
+    outs() << "   (class_t entends past the end of the section)\n";
+  } else
+    memcpy(&c, r, sizeof(struct class64_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(c);
+
+  outs() << "           isa " << format("0x%" PRIx64, c.isa);
+  name = get_symbol_64(offset + offsetof(struct class64_t, isa), S, info,
+                       isa_n_value, c.isa);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  outs() << "    superclass " << format("0x%" PRIx64, c.superclass);
+  name = get_symbol_64(offset + offsetof(struct class64_t, superclass), S, info,
+                       n_value, c.superclass);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  outs() << "         cache " << format("0x%" PRIx64, c.cache);
+  name = get_symbol_64(offset + offsetof(struct class64_t, cache), S, info,
+                       n_value, c.cache);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  outs() << "        vtable " << format("0x%" PRIx64, c.vtable);
+  name = get_symbol_64(offset + offsetof(struct class64_t, vtable), S, info,
+                       n_value, c.vtable);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  name = get_symbol_64(offset + offsetof(struct class64_t, data), S, info,
+                       n_value, c.data);
+  outs() << "          data ";
+  if (n_value != 0) {
+    if (info->verbose && name != nullptr)
+      outs() << name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (c.data != 0)
+      outs() << " + " << format("0x%" PRIx64, c.data);
+  } else
+    outs() << format("0x%" PRIx64, c.data);
+  outs() << " (struct class_ro_t *)";
+
+  // This is a Swift class if some of the low bits of the pointer are set.
+  if ((c.data + n_value) & 0x7)
+    outs() << " Swift class";
+  outs() << "\n";
+  bool is_meta_class;
+  print_class_ro64_t((c.data + n_value) & ~0x7, info, is_meta_class);
+
+  if (is_meta_class == false) {
+    outs() << "Meta Class\n";
+    print_class64_t(c.isa + isa_n_value, info);
+  }
+}
+
+static void print_class32_t(uint32_t p, struct DisassembleInfo *info) {
+  struct class32_t c;
+  const char *r;
+  uint32_t offset, left;
+  SectionRef S;
+  const char *name;
+
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&c, '\0', sizeof(struct class32_t));
+  if (left < sizeof(struct class32_t)) {
+    memcpy(&c, r, left);
+    outs() << "   (class_t entends past the end of the section)\n";
+  } else
+    memcpy(&c, r, sizeof(struct class32_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(c);
+
+  outs() << "           isa " << format("0x%" PRIx32, c.isa);
+  name =
+      get_symbol_32(offset + offsetof(struct class32_t, isa), S, info, c.isa);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  outs() << "    superclass " << format("0x%" PRIx32, c.superclass);
+  name = get_symbol_32(offset + offsetof(struct class32_t, superclass), S, info,
+                       c.superclass);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  outs() << "         cache " << format("0x%" PRIx32, c.cache);
+  name = get_symbol_32(offset + offsetof(struct class32_t, cache), S, info,
+                       c.cache);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  outs() << "        vtable " << format("0x%" PRIx32, c.vtable);
+  name = get_symbol_32(offset + offsetof(struct class32_t, vtable), S, info,
+                       c.vtable);
+  if (name != nullptr)
+    outs() << " " << name;
+  outs() << "\n";
+
+  name =
+      get_symbol_32(offset + offsetof(struct class32_t, data), S, info, c.data);
+  outs() << "          data " << format("0x%" PRIx32, c.data)
+         << " (struct class_ro_t *)";
+
+  // This is a Swift class if some of the low bits of the pointer are set.
+  if (c.data & 0x3)
+    outs() << " Swift class";
+  outs() << "\n";
+  bool is_meta_class;
+  print_class_ro32_t(c.data & ~0x3, info, is_meta_class);
+
+  if (is_meta_class == false) {
+    outs() << "Meta Class\n";
+    print_class32_t(c.isa, info);
+  }
+}
+
+static void print_objc_class_t(struct objc_class_t *objc_class,
+                               struct DisassembleInfo *info) {
+  uint32_t offset, left, xleft;
+  const char *name, *p, *ivar_list;
+  SectionRef S;
+  int32_t i;
+  struct objc_ivar_list_t objc_ivar_list;
+  struct objc_ivar_t ivar;
+
+  outs() << "\t\t      isa " << format("0x%08" PRIx32, objc_class->isa);
+  if (info->verbose && CLS_GETINFO(objc_class, CLS_META)) {
+    name = get_pointer_32(objc_class->isa, offset, left, S, info, true);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    else
+      outs() << " (not in an __OBJC section)";
+  }
+  outs() << "\n";
+
+  outs() << "\t      super_class "
+         << format("0x%08" PRIx32, objc_class->super_class);
+  if (info->verbose) {
+    name = get_pointer_32(objc_class->super_class, offset, left, S, info, true);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    else
+      outs() << " (not in an __OBJC section)";
+  }
+  outs() << "\n";
+
+  outs() << "\t\t     name " << format("0x%08" PRIx32, objc_class->name);
+  if (info->verbose) {
+    name = get_pointer_32(objc_class->name, offset, left, S, info, true);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    else
+      outs() << " (not in an __OBJC section)";
+  }
+  outs() << "\n";
+
+  outs() << "\t\t  version " << format("0x%08" PRIx32, objc_class->version)
+         << "\n";
+
+  outs() << "\t\t     info " << format("0x%08" PRIx32, objc_class->info);
+  if (info->verbose) {
+    if (CLS_GETINFO(objc_class, CLS_CLASS))
+      outs() << " CLS_CLASS";
+    else if (CLS_GETINFO(objc_class, CLS_META))
+      outs() << " CLS_META";
+  }
+  outs() << "\n";
+
+  outs() << "\t    instance_size "
+         << format("0x%08" PRIx32, objc_class->instance_size) << "\n";
+
+  p = get_pointer_32(objc_class->ivars, offset, left, S, info, true);
+  outs() << "\t\t    ivars " << format("0x%08" PRIx32, objc_class->ivars);
+  if (p != nullptr) {
+    if (left > sizeof(struct objc_ivar_list_t)) {
+      outs() << "\n";
+      memcpy(&objc_ivar_list, p, sizeof(struct objc_ivar_list_t));
+    } else {
+      outs() << " (entends past the end of the section)\n";
+      memset(&objc_ivar_list, '\0', sizeof(struct objc_ivar_list_t));
+      memcpy(&objc_ivar_list, p, left);
+    }
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(objc_ivar_list);
+    outs() << "\t\t       ivar_count " << objc_ivar_list.ivar_count << "\n";
+    ivar_list = p + sizeof(struct objc_ivar_list_t);
+    for (i = 0; i < objc_ivar_list.ivar_count; i++) {
+      if ((i + 1) * sizeof(struct objc_ivar_t) > left) {
+        outs() << "\t\t remaining ivar's extend past the of the section\n";
+        break;
+      }
+      memcpy(&ivar, ivar_list + i * sizeof(struct objc_ivar_t),
+             sizeof(struct objc_ivar_t));
+      if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+        swapStruct(ivar);
+
+      outs() << "\t\t\tivar_name " << format("0x%08" PRIx32, ivar.ivar_name);
+      if (info->verbose) {
+        name = get_pointer_32(ivar.ivar_name, offset, xleft, S, info, true);
+        if (name != nullptr)
+          outs() << format(" %.*s", xleft, name);
+        else
+          outs() << " (not in an __OBJC section)";
+      }
+      outs() << "\n";
+
+      outs() << "\t\t\tivar_type " << format("0x%08" PRIx32, ivar.ivar_type);
+      if (info->verbose) {
+        name = get_pointer_32(ivar.ivar_type, offset, xleft, S, info, true);
+        if (name != nullptr)
+          outs() << format(" %.*s", xleft, name);
+        else
+          outs() << " (not in an __OBJC section)";
+      }
+      outs() << "\n";
+
+      outs() << "\t\t      ivar_offset "
+             << format("0x%08" PRIx32, ivar.ivar_offset) << "\n";
+    }
+  } else {
+    outs() << " (not in an __OBJC section)\n";
+  }
+
+  outs() << "\t\t  methods " << format("0x%08" PRIx32, objc_class->methodLists);
+  if (print_method_list(objc_class->methodLists, info))
+    outs() << " (not in an __OBJC section)\n";
+
+  outs() << "\t\t    cache " << format("0x%08" PRIx32, objc_class->cache)
+         << "\n";
+
+  outs() << "\t\tprotocols " << format("0x%08" PRIx32, objc_class->protocols);
+  if (print_protocol_list(objc_class->protocols, 16, info))
+    outs() << " (not in an __OBJC section)\n";
+}
+
+static void print_objc_objc_category_t(struct objc_category_t *objc_category,
+                                       struct DisassembleInfo *info) {
+  uint32_t offset, left;
+  const char *name;
+  SectionRef S;
+
+  outs() << "\t       category name "
+         << format("0x%08" PRIx32, objc_category->category_name);
+  if (info->verbose) {
+    name = get_pointer_32(objc_category->category_name, offset, left, S, info,
+                          true);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    else
+      outs() << " (not in an __OBJC section)";
+  }
+  outs() << "\n";
+
+  outs() << "\t\t  class name "
+         << format("0x%08" PRIx32, objc_category->class_name);
+  if (info->verbose) {
+    name =
+        get_pointer_32(objc_category->class_name, offset, left, S, info, true);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    else
+      outs() << " (not in an __OBJC section)";
+  }
+  outs() << "\n";
+
+  outs() << "\t    instance methods "
+         << format("0x%08" PRIx32, objc_category->instance_methods);
+  if (print_method_list(objc_category->instance_methods, info))
+    outs() << " (not in an __OBJC section)\n";
+
+  outs() << "\t       class methods "
+         << format("0x%08" PRIx32, objc_category->class_methods);
+  if (print_method_list(objc_category->class_methods, info))
+    outs() << " (not in an __OBJC section)\n";
+}
+
+static void print_category64_t(uint64_t p, struct DisassembleInfo *info) {
+  struct category64_t c;
+  const char *r;
+  uint32_t offset, xoffset, left;
+  SectionRef S, xS;
+  const char *name, *sym_name;
+  uint64_t n_value;
+
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&c, '\0', sizeof(struct category64_t));
+  if (left < sizeof(struct category64_t)) {
+    memcpy(&c, r, left);
+    outs() << "   (category_t entends past the end of the section)\n";
+  } else
+    memcpy(&c, r, sizeof(struct category64_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(c);
+
+  outs() << "              name ";
+  sym_name = get_symbol_64(offset + offsetof(struct category64_t, name), S,
+                           info, n_value, c.name);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (c.name != 0)
+      outs() << " + " << format("0x%" PRIx64, c.name);
+  } else
+    outs() << format("0x%" PRIx64, c.name);
+  name = get_pointer_64(c.name + n_value, xoffset, left, xS, info);
+  if (name != nullptr)
+    outs() << format(" %.*s", left, name);
+  outs() << "\n";
+
+  outs() << "               cls ";
+  sym_name = get_symbol_64(offset + offsetof(struct category64_t, cls), S, info,
+                           n_value, c.cls);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (c.cls != 0)
+      outs() << " + " << format("0x%" PRIx64, c.cls);
+  } else
+    outs() << format("0x%" PRIx64, c.cls);
+  outs() << "\n";
+  if (c.cls + n_value != 0)
+    print_class64_t(c.cls + n_value, info);
+
+  outs() << "   instanceMethods ";
+  sym_name =
+      get_symbol_64(offset + offsetof(struct category64_t, instanceMethods), S,
+                    info, n_value, c.instanceMethods);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (c.instanceMethods != 0)
+      outs() << " + " << format("0x%" PRIx64, c.instanceMethods);
+  } else
+    outs() << format("0x%" PRIx64, c.instanceMethods);
+  outs() << "\n";
+  if (c.instanceMethods + n_value != 0)
+    print_method_list64_t(c.instanceMethods + n_value, info, "");
+
+  outs() << "      classMethods ";
+  sym_name = get_symbol_64(offset + offsetof(struct category64_t, classMethods),
+                           S, info, n_value, c.classMethods);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (c.classMethods != 0)
+      outs() << " + " << format("0x%" PRIx64, c.classMethods);
+  } else
+    outs() << format("0x%" PRIx64, c.classMethods);
+  outs() << "\n";
+  if (c.classMethods + n_value != 0)
+    print_method_list64_t(c.classMethods + n_value, info, "");
+
+  outs() << "         protocols ";
+  sym_name = get_symbol_64(offset + offsetof(struct category64_t, protocols), S,
+                           info, n_value, c.protocols);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (c.protocols != 0)
+      outs() << " + " << format("0x%" PRIx64, c.protocols);
+  } else
+    outs() << format("0x%" PRIx64, c.protocols);
+  outs() << "\n";
+  if (c.protocols + n_value != 0)
+    print_protocol_list64_t(c.protocols + n_value, info);
+
+  outs() << "instanceProperties ";
+  sym_name =
+      get_symbol_64(offset + offsetof(struct category64_t, instanceProperties),
+                    S, info, n_value, c.instanceProperties);
+  if (n_value != 0) {
+    if (info->verbose && sym_name != nullptr)
+      outs() << sym_name;
+    else
+      outs() << format("0x%" PRIx64, n_value);
+    if (c.instanceProperties != 0)
+      outs() << " + " << format("0x%" PRIx64, c.instanceProperties);
+  } else
+    outs() << format("0x%" PRIx64, c.instanceProperties);
+  outs() << "\n";
+  if (c.instanceProperties + n_value != 0)
+    print_objc_property_list64(c.instanceProperties + n_value, info);
+}
+
+static void print_category32_t(uint32_t p, struct DisassembleInfo *info) {
+  struct category32_t c;
+  const char *r;
+  uint32_t offset, left;
+  SectionRef S, xS;
+  const char *name;
+
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&c, '\0', sizeof(struct category32_t));
+  if (left < sizeof(struct category32_t)) {
+    memcpy(&c, r, left);
+    outs() << "   (category_t entends past the end of the section)\n";
+  } else
+    memcpy(&c, r, sizeof(struct category32_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(c);
+
+  outs() << "              name " << format("0x%" PRIx32, c.name);
+  name = get_symbol_32(offset + offsetof(struct category32_t, name), S, info,
+                       c.name);
+  if (name != NULL)
+    outs() << " " << name;
+  outs() << "\n";
+
+  outs() << "               cls " << format("0x%" PRIx32, c.cls) << "\n";
+  if (c.cls != 0)
+    print_class32_t(c.cls, info);
+  outs() << "   instanceMethods " << format("0x%" PRIx32, c.instanceMethods)
+         << "\n";
+  if (c.instanceMethods != 0)
+    print_method_list32_t(c.instanceMethods, info, "");
+  outs() << "      classMethods " << format("0x%" PRIx32, c.classMethods)
+         << "\n";
+  if (c.classMethods != 0)
+    print_method_list32_t(c.classMethods, info, "");
+  outs() << "         protocols " << format("0x%" PRIx32, c.protocols) << "\n";
+  if (c.protocols != 0)
+    print_protocol_list32_t(c.protocols, info);
+  outs() << "instanceProperties " << format("0x%" PRIx32, c.instanceProperties)
+         << "\n";
+  if (c.instanceProperties != 0)
+    print_objc_property_list32(c.instanceProperties, info);
+}
+
+static void print_message_refs64(SectionRef S, struct DisassembleInfo *info) {
+  uint32_t i, left, offset, xoffset;
+  uint64_t p, n_value;
+  struct message_ref64 mr;
+  const char *name, *sym_name;
+  const char *r;
+  SectionRef xS;
+
+  if (S == SectionRef())
+    return;
+
+  StringRef SectName;
+  S.getName(SectName);
+  DataRefImpl Ref = S.getRawDataRefImpl();
+  StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
+  outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
+  offset = 0;
+  for (i = 0; i < S.getSize(); i += sizeof(struct message_ref64)) {
+    p = S.getAddress() + i;
+    r = get_pointer_64(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&mr, '\0', sizeof(struct message_ref64));
+    if (left < sizeof(struct message_ref64)) {
+      memcpy(&mr, r, left);
+      outs() << "   (message_ref entends past the end of the section)\n";
+    } else
+      memcpy(&mr, r, sizeof(struct message_ref64));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(mr);
+
+    outs() << "  imp ";
+    name = get_symbol_64(offset + offsetof(struct message_ref64, imp), S, info,
+                         n_value, mr.imp);
+    if (n_value != 0) {
+      outs() << format("0x%" PRIx64, n_value) << " ";
+      if (mr.imp != 0)
+        outs() << "+ " << format("0x%" PRIx64, mr.imp) << " ";
+    } else
+      outs() << format("0x%" PRIx64, mr.imp) << " ";
+    if (name != nullptr)
+      outs() << " " << name;
+    outs() << "\n";
+
+    outs() << "  sel ";
+    sym_name = get_symbol_64(offset + offsetof(struct message_ref64, sel), S,
+                             info, n_value, mr.sel);
+    if (n_value != 0) {
+      if (info->verbose && sym_name != nullptr)
+        outs() << sym_name;
+      else
+        outs() << format("0x%" PRIx64, n_value);
+      if (mr.sel != 0)
+        outs() << " + " << format("0x%" PRIx64, mr.sel);
+    } else
+      outs() << format("0x%" PRIx64, mr.sel);
+    name = get_pointer_64(mr.sel + n_value, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << format(" %.*s", left, name);
+    outs() << "\n";
+
+    offset += sizeof(struct message_ref64);
+  }
+}
+
+static void print_message_refs32(SectionRef S, struct DisassembleInfo *info) {
+  uint32_t i, left, offset, xoffset, p;
+  struct message_ref32 mr;
+  const char *name, *r;
+  SectionRef xS;
+
+  if (S == SectionRef())
+    return;
+
+  StringRef SectName;
+  S.getName(SectName);
+  DataRefImpl Ref = S.getRawDataRefImpl();
+  StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
+  outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
+  offset = 0;
+  for (i = 0; i < S.getSize(); i += sizeof(struct message_ref64)) {
+    p = S.getAddress() + i;
+    r = get_pointer_32(p, offset, left, S, info);
+    if (r == nullptr)
+      return;
+    memset(&mr, '\0', sizeof(struct message_ref32));
+    if (left < sizeof(struct message_ref32)) {
+      memcpy(&mr, r, left);
+      outs() << "   (message_ref entends past the end of the section)\n";
+    } else
+      memcpy(&mr, r, sizeof(struct message_ref32));
+    if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(mr);
+
+    outs() << "  imp " << format("0x%" PRIx32, mr.imp);
+    name = get_symbol_32(offset + offsetof(struct message_ref32, imp), S, info,
+                         mr.imp);
+    if (name != nullptr)
+      outs() << " " << name;
+    outs() << "\n";
+
+    outs() << "  sel " << format("0x%" PRIx32, mr.sel);
+    name = get_pointer_32(mr.sel, xoffset, left, xS, info);
+    if (name != nullptr)
+      outs() << " " << name;
+    outs() << "\n";
+
+    offset += sizeof(struct message_ref32);
+  }
+}
+
+static void print_image_info64(SectionRef S, struct DisassembleInfo *info) {
+  uint32_t left, offset, swift_version;
+  uint64_t p;
+  struct objc_image_info64 o;
+  const char *r;
+
+  StringRef SectName;
+  S.getName(SectName);
+  DataRefImpl Ref = S.getRawDataRefImpl();
+  StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
+  outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
+  p = S.getAddress();
+  r = get_pointer_64(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&o, '\0', sizeof(struct objc_image_info64));
+  if (left < sizeof(struct objc_image_info64)) {
+    memcpy(&o, r, left);
+    outs() << "   (objc_image_info entends past the end of the section)\n";
+  } else
+    memcpy(&o, r, sizeof(struct objc_image_info64));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(o);
+  outs() << "  version " << o.version << "\n";
+  outs() << "    flags " << format("0x%" PRIx32, o.flags);
+  if (o.flags & OBJC_IMAGE_IS_REPLACEMENT)
+    outs() << " OBJC_IMAGE_IS_REPLACEMENT";
+  if (o.flags & OBJC_IMAGE_SUPPORTS_GC)
+    outs() << " OBJC_IMAGE_SUPPORTS_GC";
+  swift_version = (o.flags >> 8) & 0xff;
+  if (swift_version != 0) {
+    if (swift_version == 1)
+      outs() << " Swift 1.0";
+    else if (swift_version == 2)
+      outs() << " Swift 1.1";
+    else
+      outs() << " unknown future Swift version (" << swift_version << ")";
+  }
+  outs() << "\n";
+}
+
+static void print_image_info32(SectionRef S, struct DisassembleInfo *info) {
+  uint32_t left, offset, swift_version, p;
+  struct objc_image_info32 o;
+  const char *r;
+
+  StringRef SectName;
+  S.getName(SectName);
+  DataRefImpl Ref = S.getRawDataRefImpl();
+  StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
+  outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
+  p = S.getAddress();
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&o, '\0', sizeof(struct objc_image_info32));
+  if (left < sizeof(struct objc_image_info32)) {
+    memcpy(&o, r, left);
+    outs() << "   (objc_image_info entends past the end of the section)\n";
+  } else
+    memcpy(&o, r, sizeof(struct objc_image_info32));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(o);
+  outs() << "  version " << o.version << "\n";
+  outs() << "    flags " << format("0x%" PRIx32, o.flags);
+  if (o.flags & OBJC_IMAGE_IS_REPLACEMENT)
+    outs() << " OBJC_IMAGE_IS_REPLACEMENT";
+  if (o.flags & OBJC_IMAGE_SUPPORTS_GC)
+    outs() << " OBJC_IMAGE_SUPPORTS_GC";
+  swift_version = (o.flags >> 8) & 0xff;
+  if (swift_version != 0) {
+    if (swift_version == 1)
+      outs() << " Swift 1.0";
+    else if (swift_version == 2)
+      outs() << " Swift 1.1";
+    else
+      outs() << " unknown future Swift version (" << swift_version << ")";
+  }
+  outs() << "\n";
+}
+
+static void print_image_info(SectionRef S, struct DisassembleInfo *info) {
+  uint32_t left, offset, p;
+  struct imageInfo_t o;
+  const char *r;
+
+  StringRef SectName;
+  S.getName(SectName);
+  DataRefImpl Ref = S.getRawDataRefImpl();
+  StringRef SegName = info->O->getSectionFinalSegmentName(Ref);
+  outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
+  p = S.getAddress();
+  r = get_pointer_32(p, offset, left, S, info);
+  if (r == nullptr)
+    return;
+  memset(&o, '\0', sizeof(struct imageInfo_t));
+  if (left < sizeof(struct imageInfo_t)) {
+    memcpy(&o, r, left);
+    outs() << " (imageInfo entends past the end of the section)\n";
+  } else
+    memcpy(&o, r, sizeof(struct imageInfo_t));
+  if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
+    swapStruct(o);
+  outs() << "  version " << o.version << "\n";
+  outs() << "    flags " << format("0x%" PRIx32, o.flags);
+  if (o.flags & 0x1)
+    outs() << "  F&C";
+  if (o.flags & 0x2)
+    outs() << " GC";
+  if (o.flags & 0x4)
+    outs() << " GC-only";
+  else
+    outs() << " RR";
+  outs() << "\n";
+}
+
+static void printObjc2_64bit_MetaData(MachOObjectFile *O, bool verbose) {
+  SymbolAddressMap AddrMap;
+  if (verbose)
+    CreateSymbolAddressMap(O, &AddrMap);
+
+  std::vector<SectionRef> Sections;
+  for (const SectionRef &Section : O->sections()) {
+    StringRef SectName;
+    Section.getName(SectName);
+    Sections.push_back(Section);
+  }
+
+  struct DisassembleInfo info;
+  // Set up the block of info used by the Symbolizer call backs.
+  info.verbose = verbose;
+  info.O = O;
+  info.AddrMap = &AddrMap;
+  info.Sections = &Sections;
+  info.class_name = nullptr;
+  info.selector_name = nullptr;
+  info.method = nullptr;
+  info.demangled_name = nullptr;
+  info.bindtable = nullptr;
+  info.adrp_addr = 0;
+  info.adrp_inst = 0;
+
+  const SectionRef CL = get_section(O, "__OBJC2", "__class_list");
+  if (CL != SectionRef()) {
+    info.S = CL;
+    walk_pointer_list_64("class", CL, O, &info, print_class64_t);
+  } else {
+    const SectionRef CL = get_section(O, "__DATA", "__objc_classlist");
+    info.S = CL;
+    walk_pointer_list_64("class", CL, O, &info, print_class64_t);
+  }
+
+  const SectionRef CR = get_section(O, "__OBJC2", "__class_refs");
+  if (CR != SectionRef()) {
+    info.S = CR;
+    walk_pointer_list_64("class refs", CR, O, &info, nullptr);
+  } else {
+    const SectionRef CR = get_section(O, "__DATA", "__objc_classrefs");
+    info.S = CR;
+    walk_pointer_list_64("class refs", CR, O, &info, nullptr);
+  }
+
+  const SectionRef SR = get_section(O, "__OBJC2", "__super_refs");
+  if (SR != SectionRef()) {
+    info.S = SR;
+    walk_pointer_list_64("super refs", SR, O, &info, nullptr);
+  } else {
+    const SectionRef SR = get_section(O, "__DATA", "__objc_superrefs");
+    info.S = SR;
+    walk_pointer_list_64("super refs", SR, O, &info, nullptr);
+  }
+
+  const SectionRef CA = get_section(O, "__OBJC2", "__category_list");
+  if (CA != SectionRef()) {
+    info.S = CA;
+    walk_pointer_list_64("category", CA, O, &info, print_category64_t);
+  } else {
+    const SectionRef CA = get_section(O, "__DATA", "__objc_catlist");
+    info.S = CA;
+    walk_pointer_list_64("category", CA, O, &info, print_category64_t);
+  }
+
+  const SectionRef PL = get_section(O, "__OBJC2", "__protocol_list");
+  if (PL != SectionRef()) {
+    info.S = PL;
+    walk_pointer_list_64("protocol", PL, O, &info, nullptr);
+  } else {
+    const SectionRef PL = get_section(O, "__DATA", "__objc_protolist");
+    info.S = PL;
+    walk_pointer_list_64("protocol", PL, O, &info, nullptr);
+  }
+
+  const SectionRef MR = get_section(O, "__OBJC2", "__message_refs");
+  if (MR != SectionRef()) {
+    info.S = MR;
+    print_message_refs64(MR, &info);
+  } else {
+    const SectionRef MR = get_section(O, "__DATA", "__objc_msgrefs");
+    info.S = MR;
+    print_message_refs64(MR, &info);
+  }
+
+  const SectionRef II = get_section(O, "__OBJC2", "__image_info");
+  if (II != SectionRef()) {
+    info.S = II;
+    print_image_info64(II, &info);
+  } else {
+    const SectionRef II = get_section(O, "__DATA", "__objc_imageinfo");
+    info.S = II;
+    print_image_info64(II, &info);
+  }
+
+  if (info.bindtable != nullptr)
+    delete info.bindtable;
+}
+
+static void printObjc2_32bit_MetaData(MachOObjectFile *O, bool verbose) {
+  SymbolAddressMap AddrMap;
+  if (verbose)
+    CreateSymbolAddressMap(O, &AddrMap);
+
+  std::vector<SectionRef> Sections;
+  for (const SectionRef &Section : O->sections()) {
+    StringRef SectName;
+    Section.getName(SectName);
+    Sections.push_back(Section);
+  }
+
+  struct DisassembleInfo info;
+  // Set up the block of info used by the Symbolizer call backs.
+  info.verbose = verbose;
+  info.O = O;
+  info.AddrMap = &AddrMap;
+  info.Sections = &Sections;
+  info.class_name = nullptr;
+  info.selector_name = nullptr;
+  info.method = nullptr;
+  info.demangled_name = nullptr;
+  info.bindtable = nullptr;
+  info.adrp_addr = 0;
+  info.adrp_inst = 0;
+
+  const SectionRef CL = get_section(O, "__OBJC2", "__class_list");
+  if (CL != SectionRef()) {
+    info.S = CL;
+    walk_pointer_list_32("class", CL, O, &info, print_class32_t);
+  } else {
+    const SectionRef CL = get_section(O, "__DATA", "__objc_classlist");
+    info.S = CL;
+    walk_pointer_list_32("class", CL, O, &info, print_class32_t);
+  }
+
+  const SectionRef CR = get_section(O, "__OBJC2", "__class_refs");
+  if (CR != SectionRef()) {
+    info.S = CR;
+    walk_pointer_list_32("class refs", CR, O, &info, nullptr);
+  } else {
+    const SectionRef CR = get_section(O, "__DATA", "__objc_classrefs");
+    info.S = CR;
+    walk_pointer_list_32("class refs", CR, O, &info, nullptr);
+  }
+
+  const SectionRef SR = get_section(O, "__OBJC2", "__super_refs");
+  if (SR != SectionRef()) {
+    info.S = SR;
+    walk_pointer_list_32("super refs", SR, O, &info, nullptr);
+  } else {
+    const SectionRef SR = get_section(O, "__DATA", "__objc_superrefs");
+    info.S = SR;
+    walk_pointer_list_32("super refs", SR, O, &info, nullptr);
+  }
+
+  const SectionRef CA = get_section(O, "__OBJC2", "__category_list");
+  if (CA != SectionRef()) {
+    info.S = CA;
+    walk_pointer_list_32("category", CA, O, &info, print_category32_t);
+  } else {
+    const SectionRef CA = get_section(O, "__DATA", "__objc_catlist");
+    info.S = CA;
+    walk_pointer_list_32("category", CA, O, &info, print_category32_t);
+  }
+
+  const SectionRef PL = get_section(O, "__OBJC2", "__protocol_list");
+  if (PL != SectionRef()) {
+    info.S = PL;
+    walk_pointer_list_32("protocol", PL, O, &info, nullptr);
+  } else {
+    const SectionRef PL = get_section(O, "__DATA", "__objc_protolist");
+    info.S = PL;
+    walk_pointer_list_32("protocol", PL, O, &info, nullptr);
+  }
+
+  const SectionRef MR = get_section(O, "__OBJC2", "__message_refs");
+  if (MR != SectionRef()) {
+    info.S = MR;
+    print_message_refs32(MR, &info);
+  } else {
+    const SectionRef MR = get_section(O, "__DATA", "__objc_msgrefs");
+    info.S = MR;
+    print_message_refs32(MR, &info);
+  }
+
+  const SectionRef II = get_section(O, "__OBJC2", "__image_info");
+  if (II != SectionRef()) {
+    info.S = II;
+    print_image_info32(II, &info);
+  } else {
+    const SectionRef II = get_section(O, "__DATA", "__objc_imageinfo");
+    info.S = II;
+    print_image_info32(II, &info);
+  }
+}
+
+static bool printObjc1_32bit_MetaData(MachOObjectFile *O, bool verbose) {
+  uint32_t i, j, p, offset, xoffset, left, defs_left, def;
+  const char *r, *name, *defs;
+  struct objc_module_t module;
+  SectionRef S, xS;
+  struct objc_symtab_t symtab;
+  struct objc_class_t objc_class;
+  struct objc_category_t objc_category;
+
+  outs() << "Objective-C segment\n";
+  S = get_section(O, "__OBJC", "__module_info");
+  if (S == SectionRef())
+    return false;
+
+  SymbolAddressMap AddrMap;
+  if (verbose)
+    CreateSymbolAddressMap(O, &AddrMap);
+
+  std::vector<SectionRef> Sections;
+  for (const SectionRef &Section : O->sections()) {
+    StringRef SectName;
+    Section.getName(SectName);
+    Sections.push_back(Section);
+  }
+
+  struct DisassembleInfo info;
+  // Set up the block of info used by the Symbolizer call backs.
+  info.verbose = verbose;
+  info.O = O;
+  info.AddrMap = &AddrMap;
+  info.Sections = &Sections;
+  info.class_name = nullptr;
+  info.selector_name = nullptr;
+  info.method = nullptr;
+  info.demangled_name = nullptr;
+  info.bindtable = nullptr;
+  info.adrp_addr = 0;
+  info.adrp_inst = 0;
+
+  for (i = 0; i < S.getSize(); i += sizeof(struct objc_module_t)) {
+    p = S.getAddress() + i;
+    r = get_pointer_32(p, offset, left, S, &info, true);
+    if (r == nullptr)
+      return true;
+    memset(&module, '\0', sizeof(struct objc_module_t));
+    if (left < sizeof(struct objc_module_t)) {
+      memcpy(&module, r, left);
+      outs() << "   (module extends past end of __module_info section)\n";
+    } else
+      memcpy(&module, r, sizeof(struct objc_module_t));
+    if (O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(module);
+
+    outs() << "Module " << format("0x%" PRIx32, p) << "\n";
+    outs() << "    version " << module.version << "\n";
+    outs() << "       size " << module.size << "\n";
+    outs() << "       name ";
+    name = get_pointer_32(module.name, xoffset, left, xS, &info, true);
+    if (name != nullptr)
+      outs() << format("%.*s", left, name);
+    else
+      outs() << format("0x%08" PRIx32, module.name)
+             << "(not in an __OBJC section)";
+    outs() << "\n";
+
+    r = get_pointer_32(module.symtab, xoffset, left, xS, &info, true);
+    if (module.symtab == 0 || r == nullptr) {
+      outs() << "     symtab " << format("0x%08" PRIx32, module.symtab)
+             << " (not in an __OBJC section)\n";
+      continue;
+    }
+    outs() << "     symtab " << format("0x%08" PRIx32, module.symtab) << "\n";
+    memset(&symtab, '\0', sizeof(struct objc_symtab_t));
+    defs_left = 0;
+    defs = nullptr;
+    if (left < sizeof(struct objc_symtab_t)) {
+      memcpy(&symtab, r, left);
+      outs() << "\tsymtab extends past end of an __OBJC section)\n";
+    } else {
+      memcpy(&symtab, r, sizeof(struct objc_symtab_t));
+      if (left > sizeof(struct objc_symtab_t)) {
+        defs_left = left - sizeof(struct objc_symtab_t);
+        defs = r + sizeof(struct objc_symtab_t);
+      }
+    }
+    if (O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(symtab);
+
+    outs() << "\tsel_ref_cnt " << symtab.sel_ref_cnt << "\n";
+    r = get_pointer_32(symtab.refs, xoffset, left, xS, &info, true);
+    outs() << "\trefs " << format("0x%08" PRIx32, symtab.refs);
+    if (r == nullptr)
+      outs() << " (not in an __OBJC section)";
+    outs() << "\n";
+    outs() << "\tcls_def_cnt " << symtab.cls_def_cnt << "\n";
+    outs() << "\tcat_def_cnt " << symtab.cat_def_cnt << "\n";
+    if (symtab.cls_def_cnt > 0)
+      outs() << "\tClass Definitions\n";
+    for (j = 0; j < symtab.cls_def_cnt; j++) {
+      if ((j + 1) * sizeof(uint32_t) > defs_left) {
+        outs() << "\t(remaining class defs entries entends past the end of the "
+               << "section)\n";
+        break;
+      }
+      memcpy(&def, defs + j * sizeof(uint32_t), sizeof(uint32_t));
+      if (O->isLittleEndian() != sys::IsLittleEndianHost)
+        sys::swapByteOrder(def);
+
+      r = get_pointer_32(def, xoffset, left, xS, &info, true);
+      outs() << "\tdefs[" << j << "] " << format("0x%08" PRIx32, def);
+      if (r != nullptr) {
+        if (left > sizeof(struct objc_class_t)) {
+          outs() << "\n";
+          memcpy(&objc_class, r, sizeof(struct objc_class_t));
+        } else {
+          outs() << " (entends past the end of the section)\n";
+          memset(&objc_class, '\0', sizeof(struct objc_class_t));
+          memcpy(&objc_class, r, left);
+        }
+        if (O->isLittleEndian() != sys::IsLittleEndianHost)
+          swapStruct(objc_class);
+        print_objc_class_t(&objc_class, &info);
+      } else {
+        outs() << "(not in an __OBJC section)\n";
+      }
+
+      if (CLS_GETINFO(&objc_class, CLS_CLASS)) {
+        outs() << "\tMeta Class";
+        r = get_pointer_32(objc_class.isa, xoffset, left, xS, &info, true);
+        if (r != nullptr) {
+          if (left > sizeof(struct objc_class_t)) {
+            outs() << "\n";
+            memcpy(&objc_class, r, sizeof(struct objc_class_t));
+          } else {
+            outs() << " (entends past the end of the section)\n";
+            memset(&objc_class, '\0', sizeof(struct objc_class_t));
+            memcpy(&objc_class, r, left);
+          }
+          if (O->isLittleEndian() != sys::IsLittleEndianHost)
+            swapStruct(objc_class);
+          print_objc_class_t(&objc_class, &info);
+        } else {
+          outs() << "(not in an __OBJC section)\n";
+        }
+      }
+    }
+    if (symtab.cat_def_cnt > 0)
+      outs() << "\tCategory Definitions\n";
+    for (j = 0; j < symtab.cat_def_cnt; j++) {
+      if ((j + symtab.cls_def_cnt + 1) * sizeof(uint32_t) > defs_left) {
+        outs() << "\t(remaining category defs entries entends past the end of "
+               << "the section)\n";
+        break;
+      }
+      memcpy(&def, defs + (j + symtab.cls_def_cnt) * sizeof(uint32_t),
+             sizeof(uint32_t));
+      if (O->isLittleEndian() != sys::IsLittleEndianHost)
+        sys::swapByteOrder(def);
+
+      r = get_pointer_32(def, xoffset, left, xS, &info, true);
+      outs() << "\tdefs[" << j + symtab.cls_def_cnt << "] "
+             << format("0x%08" PRIx32, def);
+      if (r != nullptr) {
+        if (left > sizeof(struct objc_category_t)) {
+          outs() << "\n";
+          memcpy(&objc_category, r, sizeof(struct objc_category_t));
+        } else {
+          outs() << " (entends past the end of the section)\n";
+          memset(&objc_category, '\0', sizeof(struct objc_category_t));
+          memcpy(&objc_category, r, left);
+        }
+        if (O->isLittleEndian() != sys::IsLittleEndianHost)
+          swapStruct(objc_category);
+        print_objc_objc_category_t(&objc_category, &info);
+      } else {
+        outs() << "(not in an __OBJC section)\n";
+      }
+    }
+  }
+  const SectionRef II = get_section(O, "__OBJC", "__image_info");
+  if (II != SectionRef())
+    print_image_info(II, &info);
+
+  return true;
+}
+
+static void DumpProtocolSection(MachOObjectFile *O, const char *sect,
+                                uint32_t size, uint32_t addr) {
+  SymbolAddressMap AddrMap;
+  CreateSymbolAddressMap(O, &AddrMap);
+
+  std::vector<SectionRef> Sections;
+  for (const SectionRef &Section : O->sections()) {
+    StringRef SectName;
+    Section.getName(SectName);
+    Sections.push_back(Section);
+  }
+
+  struct DisassembleInfo info;
+  // Set up the block of info used by the Symbolizer call backs.
+  info.verbose = true;
+  info.O = O;
+  info.AddrMap = &AddrMap;
+  info.Sections = &Sections;
+  info.class_name = nullptr;
+  info.selector_name = nullptr;
+  info.method = nullptr;
+  info.demangled_name = nullptr;
+  info.bindtable = nullptr;
+  info.adrp_addr = 0;
+  info.adrp_inst = 0;
+
+  const char *p;
+  struct objc_protocol_t protocol;
+  uint32_t left, paddr;
+  for (p = sect; p < sect + size; p += sizeof(struct objc_protocol_t)) {
+    memset(&protocol, '\0', sizeof(struct objc_protocol_t));
+    left = size - (p - sect);
+    if (left < sizeof(struct objc_protocol_t)) {
+      outs() << "Protocol extends past end of __protocol section\n";
+      memcpy(&protocol, p, left);
+    } else
+      memcpy(&protocol, p, sizeof(struct objc_protocol_t));
+    if (O->isLittleEndian() != sys::IsLittleEndianHost)
+      swapStruct(protocol);
+    paddr = addr + (p - sect);
+    outs() << "Protocol " << format("0x%" PRIx32, paddr);
+    if (print_protocol(paddr, 0, &info))
+      outs() << "(not in an __OBJC section)\n";
+  }
+}
+
+static void printObjcMetaData(MachOObjectFile *O, bool verbose) {
+  if (O->is64Bit())
+    printObjc2_64bit_MetaData(O, verbose);
+  else {
+    MachO::mach_header H;
+    H = O->getHeader();
+    if (H.cputype == MachO::CPU_TYPE_ARM)
+      printObjc2_32bit_MetaData(O, verbose);
+    else {
+      // This is the 32-bit non-arm cputype case.  Which is normally
+      // the first Objective-C ABI.  But it may be the case of a
+      // binary for the iOS simulator which is the second Objective-C
+      // ABI.  In that case printObjc1_32bit_MetaData() will determine that
+      // and return false.
+      if (printObjc1_32bit_MetaData(O, verbose) == false)
+        printObjc2_32bit_MetaData(O, verbose);
+    }
+  }
+}
+
 // GuessLiteralPointer returns a string which for the item in the Mach-O file
 // for the address passed in as ReferenceValue for printing as a comment with
 // the instruction and also returns the corresponding type of that item
@@ -3022,7 +5997,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   }
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
-      AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI, *STI));
+      Triple(TripleName), AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI));
   // Set the display preference for hex vs. decimal immediates.
   IP->setPrintImmHex(PrintImmHex);
   // Comment stream and backing vector.
@@ -3070,8 +6045,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     }
     int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect();
     ThumbIP.reset(ThumbTarget->createMCInstPrinter(
-        ThumbAsmPrinterVariant, *ThumbAsmInfo, *ThumbInstrInfo, *ThumbMRI,
-        *ThumbSTI));
+        Triple(ThumbTripleName), ThumbAsmPrinterVariant, *ThumbAsmInfo,
+        *ThumbInstrInfo, *ThumbMRI));
     // Set the display preference for hex vs. decimal immediates.
     ThumbIP->setPrintImmHex(PrintImmHex);
   }
@@ -3337,9 +6312,9 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
           Annotations.flush();
           StringRef AnnotationsStr = Annotations.str();
           if (isThumb)
-            ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr);
+            ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr, *ThumbSTI);
           else
-            IP->printInst(&Inst, FormattedOS, AnnotationsStr);
+            IP->printInst(&Inst, FormattedOS, AnnotationsStr, *STI);
           emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo);
 
           // Print debug info.
@@ -3398,7 +6373,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
             outs() << "\t";
             DumpBytes(ArrayRef<uint8_t>(Bytes.data() + Index, InstSize));
           }
-          IP->printInst(&Inst, outs(), "");
+          IP->printInst(&Inst, outs(), "", *STI);
           outs() << "\n";
         } else {
           unsigned int Arch = MachOOF->getArch();
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 7bec062..71de8ad 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -274,7 +274,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
-      AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
+      Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
   if (!IP) {
     errs() << "error: no instruction printer for target " << TripleName
       << '\n';
@@ -401,7 +401,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
             outs() << "\t";
             DumpBytes(ArrayRef<uint8_t>(Bytes.data() + Index, Size));
           }
-          IP->printInst(&Inst, outs(), "");
+          IP->printInst(&Inst, outs(), "", *STI);
           outs() << CommentStream.str();
           Comments.clear();
           outs() << "\n";
@@ -896,6 +896,7 @@ int main(int argc, char **argv) {
       && !(InfoPlist && MachOOpt)
       && !(DylibsUsed && MachOOpt)
       && !(DylibId && MachOOpt)
+      && !(ObjcMetaData && MachOOpt)
       && !(DumpSections.size() != 0 && MachOOpt)) {
     cl::PrintHelpMessage();
     return 2;
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index c1d5ff8..bde72e0 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -1,4 +1,3 @@
-//===-- llvm-objdump.h ----------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -27,6 +26,7 @@ extern cl::opt<std::string> ArchName;
 extern cl::opt<std::string> MCPU;
 extern cl::list<std::string> MAttrs;
 extern cl::list<std::string> DumpSections;
+extern cl::opt<bool> Raw;
 extern cl::opt<bool> Disassemble;
 extern cl::opt<bool> NoShowRawInsn;
 extern cl::opt<bool> PrivateHeaders;
@@ -43,6 +43,7 @@ extern cl::opt<bool> LinkOptHints;
 extern cl::opt<bool> InfoPlist;
 extern cl::opt<bool> DylibsUsed;
 extern cl::opt<bool> DylibId;
+extern cl::opt<bool> ObjcMetaData;
 extern cl::opt<std::string> DisSymName;
 extern cl::opt<bool> NonVerbose;
 extern cl::opt<bool> Relocations;
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 32db723..7e6ce49 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -325,13 +325,12 @@ static void dumpArchive(const Archive *Arc) {
 static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary) {
   for (const MachOUniversalBinary::ObjectForArch &Obj : UBinary->objects()) {
     ErrorOr<std::unique_ptr<MachOObjectFile>> ObjOrErr = Obj.getAsObjectFile();
-    if (std::error_code EC = ObjOrErr.getError()) {
-      reportError(UBinary->getFileName(), EC.message());
-      continue;
-    }
-
-    if (MachOObjectFile *MachOObj = ObjOrErr.get().get())
-      dumpObject(MachOObj);
+    if (ObjOrErr)
+      dumpObject(&*ObjOrErr.get());
+    else if (ErrorOr<std::unique_ptr<Archive>> AOrErr = Obj.getAsArchive())
+      dumpArchive(&*AOrErr.get());
+    else
+      reportError(UBinary->getFileName(), ObjOrErr.getError().message());
   }
 }
 
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 9462015..480032d 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/ADT/StringMap.h"
 #include "llvm/DebugInfo/DWARF/DIContext.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -196,7 +197,7 @@ static int printLineInfoForInput() {
   for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
     // Instantiate a dynamic linker.
     TrivialMemoryManager MemMgr;
-    RuntimeDyld Dyld(&MemMgr);
+    RuntimeDyld Dyld(MemMgr, MemMgr);
 
     // Load the input memory buffer.
 
@@ -264,7 +265,12 @@ static int executeInput() {
 
   // Instantiate a dynamic linker.
   TrivialMemoryManager MemMgr;
-  RuntimeDyld Dyld(&MemMgr);
+  RuntimeDyld Dyld(MemMgr, MemMgr);
+
+  // FIXME: Preserve buffers until resolveRelocations time to work around a bug
+  //        in RuntimeDyldELF.
+  // This fixme should be fixed ASAP. This is a very brittle workaround.
+  std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
 
   // If we don't have any input files, read from stdin.
   if (!InputFileList.size())
@@ -282,6 +288,7 @@ static int executeInput() {
       return Error("unable to create object file: '" + EC.message() + "'");
 
     ObjectFile &Obj = **MaybeObj;
+    InputBuffers.push_back(std::move(*InputBuffer));
 
     // Load the object file
     Dyld.loadObject(Obj);
@@ -506,18 +513,23 @@ static int linkAndVerify() {
   std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
 
   std::unique_ptr<MCInstPrinter> InstPrinter(
-    TheTarget->createMCInstPrinter(0, *MAI, *MII, *MRI, *STI));
+      TheTarget->createMCInstPrinter(Triple(TripleName), 0, *MAI, *MII, *MRI));
 
   // Load any dylibs requested on the command line.
   loadDylibs();
 
   // Instantiate a dynamic linker.
   TrivialMemoryManager MemMgr;
-  RuntimeDyld Dyld(&MemMgr);
+  RuntimeDyld Dyld(MemMgr, MemMgr);
   Dyld.setProcessAllSections(true);
   RuntimeDyldChecker Checker(Dyld, Disassembler.get(), InstPrinter.get(),
                              llvm::dbgs());
 
+  // FIXME: Preserve buffers until resolveRelocations time to work around a bug
+  //        in RuntimeDyldELF.
+  // This fixme should be fixed ASAP. This is a very brittle workaround.
+  std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
+
   // If we don't have any input files, read from stdin.
   if (!InputFileList.size())
     InputFileList.push_back("-");
@@ -536,6 +548,7 @@ static int linkAndVerify() {
       return Error("unable to create object file: '" + EC.message() + "'");
 
     ObjectFile &Obj = **MaybeObj;
+    InputBuffers.push_back(std::move(*InputBuffer));
 
     // Load the object file
     Dyld.loadObject(Obj);
diff --git a/tools/llvm-shlib/CMakeLists.txt b/tools/llvm-shlib/CMakeLists.txt
index 08dafe1..bc1b658 100644
--- a/tools/llvm-shlib/CMakeLists.txt
+++ b/tools/llvm-shlib/CMakeLists.txt
@@ -42,7 +42,9 @@ set(SOURCES
   libllvm.cpp
   )
 
-if(NOT DEFINED LLVM_EXPORTED_SYMBOL_FILE)
+llvm_map_components_to_libnames(LIB_NAMES ${LLVM_DYLIB_COMPONENTS})
+
+if(NOT DEFINED LLVM_DYLIB_EXPORTED_SYMBOL_FILE)
 
   if( WIN32 AND NOT CYGWIN )
     message(FATAL_ERROR "Auto-generation not implemented for Win32 without GNU utils. Please specify LLVM_EXPORTED_SYMBOL_FILE.")
@@ -53,41 +55,52 @@ if(NOT DEFINED LLVM_EXPORTED_SYMBOL_FILE)
 
   set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_BINARY_DIR}/libllvm.exports)
 
-  llvm_map_components_to_libnames(LIB_NAMES ${LLVM_DYLIB_COMPONENTS})
-
-  foreach (lib ${LIB_NAMES})
-    
-    set(LIB_DIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
-    set(LIB_NAME ${LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${lib})
-    set(LIB_PATH ${LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
-    set(LIB_EXPORTS_PATH ${LIB_NAME}.exports)
-
-    list(APPEND LLVM_DYLIB_REQUIRED_EXPORTS ${LIB_EXPORTS_PATH})
+  if (NOT LLVM_DYLIB_EXPORT_ALL)
+    foreach (lib ${LIB_NAMES})
+      set(LIB_DIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
+      set(LIB_NAME ${LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${lib})
+      set(LIB_PATH ${LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(LIB_EXPORTS_PATH ${LIB_NAME}.exports)
+      list(APPEND LLVM_DYLIB_REQUIRED_EXPORTS ${LIB_EXPORTS_PATH})
+
+      
+      add_custom_command(OUTPUT ${LIB_EXPORTS_PATH}
+        COMMAND nm ${LIB_PATH} | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" > ${LIB_EXPORTS_PATH}
+        WORKING_DIRECTORY ${LIB_DIR}
+        DEPENDS ${lib}
+        COMMENT "Generating Export list for ${lib}..."
+        VERBATIM )
+    endforeach ()
+  endif()
 
-    add_custom_command(OUTPUT ${LIB_EXPORTS_PATH}
-      COMMAND nm ${LIB_PATH} | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" > ${LIB_EXPORTS_PATH}
+  if (LLVM_DYLIB_EXPORT_ALL)
+    add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE}
+      COMMAND echo \"LLVM*\" > ${LLVM_EXPORTED_SYMBOL_FILE} && echo \"_Z*llvm*\" >> ${LLVM_EXPORTED_SYMBOL_FILE}
       WORKING_DIRECTORY ${LIB_DIR}
-      DEPENDS ${lib}
-      COMMENT "Generating Export list for ${lib}..."
-      VERBATIM )
-  endforeach ()
-
-  add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE}
-    COMMAND cat ${LLVM_DYLIB_REQUIRED_EXPORTS} > ${LLVM_EXPORTED_SYMBOL_FILE}
-    WORKING_DIRECTORY ${LIB_DIR}
-    DEPENDS ${LLVM_DYLIB_REQUIRED_EXPORTS}
-    COMMENT "Generating combined export list...")
+      DEPENDS ${LLVM_DYLIB_REQUIRED_EXPORTS}
+      COMMENT "Generating combined export list...")
+  else()
+    add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE}
+      COMMAND cat ${LLVM_DYLIB_REQUIRED_EXPORTS} > ${LLVM_EXPORTED_SYMBOL_FILE}
+      WORKING_DIRECTORY ${LIB_DIR}
+      DEPENDS ${LLVM_DYLIB_REQUIRED_EXPORTS}
+      COMMENT "Generating combined export list...")
+  endif()
 
   add_custom_target(libLLVMExports DEPENDS ${LLVM_EXPORTED_SYMBOL_FILE})
-
+else()
+  set(LLVM_EXPORTED_SYMBOL_FILE ${LLVM_DYLIB_EXPORTED_SYMBOL_FILE})
+  add_custom_target(libLLVMExports DEPENDS ${LLVM_EXPORTED_SYMBOL_FILE})
 endif()
 
 add_llvm_library(LLVM SHARED ${SOURCES})
 
+list(REMOVE_DUPLICATES LIB_NAMES)
 if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") # FIXME: It should be "GNU ld for elf"
   # GNU ld doesn't resolve symbols in the version script.
-  list(REMOVE_DUPLICATES LIB_NAMES)
   set(LIB_NAMES -Wl,--whole-archive ${LIB_NAMES} -Wl,--no-whole-archive)
+elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
+  set(LIB_NAMES -Wl,-all_load ${LIB_NAMES})
 endif()
 
 target_link_libraries(LLVM PRIVATE ${LIB_NAMES})
diff --git a/tools/opt/BreakpointPrinter.cpp b/tools/opt/BreakpointPrinter.cpp
index 3cbc0ae..8f390a1 100644
--- a/tools/opt/BreakpointPrinter.cpp
+++ b/tools/opt/BreakpointPrinter.cpp
@@ -30,17 +30,15 @@ struct BreakpointPrinter : public ModulePass {
   BreakpointPrinter(raw_ostream &out) : ModulePass(ID), Out(out) {}
 
   void getContextName(DIDescriptor Context, std::string &N) {
-    if (Context.isNameSpace()) {
-      DINameSpace NS(Context);
-      if (!NS.getName().empty()) {
-        getContextName(NS.getContext(), N);
-        N = N + NS.getName().str() + "::";
+    if (auto *NS = dyn_cast<MDNamespace>(Context)) {
+      if (!NS->getName().empty()) {
+        getContextName(NS->getScope(), N);
+        N = N + NS->getName().str() + "::";
       }
-    } else if (Context.isType()) {
-      DIType TY(Context);
-      if (!TY.getName().empty()) {
-        getContextName(TY.getContext().resolve(TypeIdentifierMap), N);
-        N = N + TY.getName().str() + "::";
+    } else if (DIType TY = dyn_cast<MDType>(Context)) {
+      if (!TY->getName().empty()) {
+        getContextName(TY->getScope().resolve(TypeIdentifierMap), N);
+        N = N + TY->getName().str() + "::";
       }
     }
   }
@@ -55,13 +53,11 @@ struct BreakpointPrinter : public ModulePass {
     if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
       for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
         std::string Name;
-        DISubprogram SP(NMD->getOperand(i));
-        assert((!SP || SP.isSubprogram()) &&
-               "A MDNode in llvm.dbg.sp should be null or a DISubprogram.");
+        auto *SP = cast_or_null<MDSubprogram>(NMD->getOperand(i));
         if (!SP)
           continue;
-        getContextName(SP.getContext().resolve(TypeIdentifierMap), Name);
-        Name = Name + SP.getDisplayName().str();
+        getContextName(SP->getScope().resolve(TypeIdentifierMap), Name);
+        Name = Name + SP->getDisplayName().str();
         if (!Name.empty() && Processed.insert(Name).second) {
           Out << Name << "\n";
         }
diff --git a/tools/opt/NewPMDriver.cpp b/tools/opt/NewPMDriver.cpp
index 9216d5c..3030d65 100644
--- a/tools/opt/NewPMDriver.cpp
+++ b/tools/opt/NewPMDriver.cpp
@@ -39,7 +39,9 @@ static cl::opt<bool>
 bool llvm::runPassPipeline(StringRef Arg0, LLVMContext &Context, Module &M,
                            TargetMachine *TM, tool_output_file *Out,
                            StringRef PassPipeline, OutputKind OK,
-                           VerifierKind VK) {
+                           VerifierKind VK,
+                           bool ShouldPreserveAssemblyUseListOrder,
+                           bool ShouldPreserveBitcodeUseListOrder) {
   PassBuilder PB(TM);
 
   FunctionAnalysisManager FAM(DebugPM);
@@ -77,10 +79,12 @@ bool llvm::runPassPipeline(StringRef Arg0, LLVMContext &Context, Module &M,
   case OK_NoOutput:
     break; // No output pass needed.
   case OK_OutputAssembly:
-    MPM.addPass(PrintModulePass(Out->os()));
+    MPM.addPass(
+        PrintModulePass(Out->os(), "", ShouldPreserveAssemblyUseListOrder));
     break;
   case OK_OutputBitcode:
-    MPM.addPass(BitcodeWriterPass(Out->os()));
+    MPM.addPass(
+        BitcodeWriterPass(Out->os(), ShouldPreserveBitcodeUseListOrder));
     break;
   }
 
diff --git a/tools/opt/NewPMDriver.h b/tools/opt/NewPMDriver.h
index 5384fe2..349a7b1 100644
--- a/tools/opt/NewPMDriver.h
+++ b/tools/opt/NewPMDriver.h
@@ -51,7 +51,9 @@ enum VerifierKind {
 bool runPassPipeline(StringRef Arg0, LLVMContext &Context, Module &M,
                      TargetMachine *TM, tool_output_file *Out,
                      StringRef PassPipeline, opt_tool::OutputKind OK,
-                     opt_tool::VerifierKind VK);
+                     opt_tool::VerifierKind VK,
+                     bool ShouldPreserveAssemblyUseListOrder,
+                     bool ShouldPreserveBitcodeUseListOrder);
 }
 
 #endif
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index c1e120a..80b1934 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassNameParser.h"
@@ -38,6 +39,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -178,7 +180,15 @@ DefaultDataLayout("default-data-layout",
           cl::desc("data layout string to use if not specified by module"),
           cl::value_desc("layout-string"), cl::init(""));
 
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+    "preserve-bc-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM bitcode."),
+    cl::init(true), cl::Hidden);
 
+static cl::opt<bool> PreserveAssemblyUseListOrder(
+    "preserve-ll-uselistorder",
+    cl::desc("Preserve use-list order when writing LLVM assembly."),
+    cl::init(false), cl::Hidden);
 
 static inline void addPass(legacy::PassManagerBase &PM, Pass *P) {
   // Add the pass to the pass manager...
@@ -265,13 +275,28 @@ static TargetMachine* GetTargetMachine(Triple TheTriple) {
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  if (MAttrs.size()) {
+  if (MAttrs.size() || MCPU == "native") {
     SubtargetFeatures Features;
+
+    // If user asked for the 'native' CPU, we need to autodetect features.
+    // This is necessary for x86 where the CPU might not support all the
+    // features the autodetected CPU name lists in the target. For example,
+    // not all Sandybridge processors support AVX.
+    if (MCPU == "native") {
+      StringMap<bool> HostFeatures;
+      if (sys::getHostCPUFeatures(HostFeatures))
+        for (auto &F : HostFeatures)
+          Features.AddFeature(F.first(), F.second);
+    }
+
     for (unsigned i = 0; i != MAttrs.size(); ++i)
       Features.AddFeature(MAttrs[i]);
     FeaturesStr = Features.getString();
   }
 
+  if (MCPU == "native")
+    MCPU = sys::getHostCPUName();
+
   return TheTarget->createTargetMachine(TheTriple.getTriple(),
                                         MCPU, FeaturesStr,
                                         InitTargetOptionsFromCodeGenFlags(),
@@ -345,6 +370,19 @@ int main(int argc, char **argv) {
     return 1;
   }
 
+  // Strip debug info before running the verifier.
+  if (StripDebug)
+    StripDebugInfo(*M);
+
+  // Immediately run the verifier to catch any problems before starting up the
+  // pass pipelines.  Otherwise we can crash on broken code during
+  // doInitialization().
+  if (!NoVerify && verifyModule(*M, &errs())) {
+    errs() << argv[0] << ": " << InputFilename
+           << ": error: input module is broken!\n";
+    return 1;
+  }
+
   // If we are supposed to override the target triple, do so now.
   if (!TargetTriple.empty())
     M->setTargetTriple(Triple::normalize(TargetTriple));
@@ -396,7 +434,8 @@ int main(int argc, char **argv) {
     // string. Hand off the rest of the functionality to the new code for that
     // layer.
     return runPassPipeline(argv[0], Context, *M, TM.get(), Out.get(),
-                           PassPipeline, OK, VK)
+                           PassPipeline, OK, VK, PreserveAssemblyUseListOrder,
+                           PreserveBitcodeUseListOrder)
                ? 0
                : 1;
   }
@@ -449,10 +488,6 @@ int main(int argc, char **argv) {
     NoOutput = true;
   }
 
-  // If the -strip-debug command line option was specified, add it.
-  if (StripDebug)
-    addPass(Passes, createStripSymbolsPass(true));
-
   // Create a new optimization pass for each one specified on the command line
   for (unsigned i = 0; i < PassList.size(); ++i) {
     if (StandardLinkOpts &&
@@ -524,7 +559,8 @@ int main(int argc, char **argv) {
     }
 
     if (PrintEachXForm)
-      Passes.add(createPrintModulePass(errs()));
+      Passes.add(
+          createPrintModulePass(errs(), "", PreserveAssemblyUseListOrder));
   }
 
   if (StandardLinkOpts) {
@@ -561,9 +597,11 @@ int main(int argc, char **argv) {
   // Write bitcode or assembly to the output as the last step...
   if (!NoOutput && !AnalyzeOnly) {
     if (OutputAssembly)
-      Passes.add(createPrintModulePass(Out->os()));
+      Passes.add(
+          createPrintModulePass(Out->os(), "", PreserveAssemblyUseListOrder));
     else
-      Passes.add(createBitcodeWriterPass(Out->os()));
+      Passes.add(
+          createBitcodeWriterPass(Out->os(), PreserveBitcodeUseListOrder));
   }
 
   // Before executing passes, print the final values of the LLVM options.
diff --git a/tools/verify-uselistorder/verify-uselistorder.cpp b/tools/verify-uselistorder/verify-uselistorder.cpp
index ef3d5b3..795d035 100644
--- a/tools/verify-uselistorder/verify-uselistorder.cpp
+++ b/tools/verify-uselistorder/verify-uselistorder.cpp
@@ -53,7 +53,7 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "use-list-order"
+#define DEBUG_TYPE "uselistorder"
 
 static cl::opt<std::string> InputFilename(cl::Positional,
                                           cl::desc("<input bitcode file>"),
@@ -109,16 +109,16 @@ struct ValueMapping {
 bool TempFile::init(const std::string &Ext) {
   SmallVector<char, 64> Vector;
   DEBUG(dbgs() << " - create-temp-file\n");
-  if (auto EC = sys::fs::createTemporaryFile("use-list-order", Ext, Vector)) {
-    (void)EC;
-    DEBUG(dbgs() << "error: " << EC.message() << "\n");
+  if (auto EC = sys::fs::createTemporaryFile("uselistorder", Ext, Vector)) {
+    errs() << "verify-uselistorder: error: " << EC.message() << "\n";
     return true;
   }
   assert(!Vector.empty());
 
   Filename.assign(Vector.data(), Vector.data() + Vector.size());
   Remover.setFile(Filename, !SaveTemps);
-  DEBUG(dbgs() << " - filename = " << Filename << "\n");
+  if (SaveTemps)
+    outs() << " - filename = " << Filename << "\n";
   return false;
 }
 
@@ -127,11 +127,11 @@ bool TempFile::writeBitcode(const Module &M) const {
   std::error_code EC;
   raw_fd_ostream OS(Filename, EC, sys::fs::F_None);
   if (EC) {
-    DEBUG(dbgs() << "error: " << EC.message() << "\n");
+    errs() << "verify-uselistorder: error: " << EC.message() << "\n";
     return true;
   }
 
-  WriteBitcodeToFile(&M, OS);
+  WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true);
   return false;
 }
 
@@ -140,11 +140,11 @@ bool TempFile::writeAssembly(const Module &M) const {
   std::error_code EC;
   raw_fd_ostream OS(Filename, EC, sys::fs::F_Text);
   if (EC) {
-    DEBUG(dbgs() << "error: " << EC.message() << "\n");
+    errs() << "verify-uselistorder: error: " << EC.message() << "\n";
     return true;
   }
 
-  OS << M;
+  M.print(OS, nullptr, /* ShouldPreserveUseListOrder */ true);
   return false;
 }
 
@@ -153,7 +153,8 @@ std::unique_ptr<Module> TempFile::readBitcode(LLVMContext &Context) const {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOr =
       MemoryBuffer::getFile(Filename);
   if (!BufferOr) {
-    DEBUG(dbgs() << "error: " << BufferOr.getError().message() << "\n");
+    errs() << "verify-uselistorder: error: " << BufferOr.getError().message()
+           << "\n";
     return nullptr;
   }
 
@@ -161,7 +162,8 @@ std::unique_ptr<Module> TempFile::readBitcode(LLVMContext &Context) const {
   ErrorOr<Module *> ModuleOr =
       parseBitcodeFile(Buffer->getMemBufferRef(), Context);
   if (!ModuleOr) {
-    DEBUG(dbgs() << "error: " << ModuleOr.getError().message() << "\n");
+    errs() << "verify-uselistorder: error: " << ModuleOr.getError().message()
+           << "\n";
     return nullptr;
   }
   return std::unique_ptr<Module>(ModuleOr.get());
@@ -172,7 +174,7 @@ std::unique_ptr<Module> TempFile::readAssembly(LLVMContext &Context) const {
   SMDiagnostic Err;
   std::unique_ptr<Module> M = parseAssemblyFile(Filename, Err, Context);
   if (!M.get())
-    DEBUG(dbgs() << "error: "; Err.print("verify-use-list-order", dbgs()));
+    Err.print("verify-uselistorder", errs());
   return M;
 }
 
@@ -343,7 +345,6 @@ static void verifyAfterRoundTrip(const Module &M,
     report_fatal_error("use-list order changed");
 }
 static void verifyBitcodeUseListOrder(const Module &M) {
-  errs() << "*** verify-use-list-order: bitcode ***\n";
   TempFile F;
   if (F.init("bc"))
     report_fatal_error("failed to initialize bitcode file");
@@ -356,7 +357,6 @@ static void verifyBitcodeUseListOrder(const Module &M) {
 }
 
 static void verifyAssemblyUseListOrder(const Module &M) {
-  errs() << "*** verify-use-list-order: assembly ***\n";
   TempFile F;
   if (F.init("ll"))
     report_fatal_error("failed to initialize assembly file");
@@ -369,7 +369,9 @@ static void verifyAssemblyUseListOrder(const Module &M) {
 }
 
 static void verifyUseListOrder(const Module &M) {
+  outs() << "verify bitcode\n";
   verifyBitcodeUseListOrder(M);
+  outs() << "verify assembly\n";
   verifyAssemblyUseListOrder(M);
 }
 
@@ -498,7 +500,6 @@ static void changeUseLists(Module &M, Changer changeValueUseList) {
 }
 
 static void shuffleUseLists(Module &M, unsigned SeedOffset) {
-  errs() << "*** shuffle-use-lists ***\n";
   std::minstd_rand0 Gen(std::minstd_rand0::default_seed + SeedOffset);
   DenseSet<Value *> Seen;
   changeUseLists(M, [&](Value *V) { shuffleValueUseLists(V, Gen, Seen); });
@@ -506,7 +507,6 @@ static void shuffleUseLists(Module &M, unsigned SeedOffset) {
 }
 
 static void reverseUseLists(Module &M) {
-  errs() << "*** reverse-use-lists ***\n";
   DenseSet<Value *> Seen;
   changeUseLists(M, [&](Value *V) { reverseValueUseLists(V, Seen); });
   DEBUG(dbgs() << "\n");
@@ -534,33 +534,29 @@ int main(int argc, char **argv) {
     Err.print(argv[0], errs());
     return 1;
   }
-  if (verifyModule(*M, &errs()))
-    report_fatal_error("verification failed");
-
-  errs() << "*** verify-use-list-order ***\n";
-  // Can't verify if order isn't preserved.
-  if (!shouldPreserveBitcodeUseListOrder()) {
-    errs() << "warning: forcing -preserve-bc-use-list-order\n";
-    setPreserveBitcodeUseListOrder(true);
-  }
-  if (!shouldPreserveAssemblyUseListOrder()) {
-    errs() << "warning: forcing -preserve-ll-use-list-order\n";
-    setPreserveAssemblyUseListOrder(true);
+  if (verifyModule(*M, &errs())) {
+    errs() << argv[0] << ": " << InputFilename
+           << ": error: input module is broken!\n";
+    return 1;
   }
 
   // Verify the use lists now and after reversing them.
+  outs() << "*** verify-uselistorder ***\n";
   verifyUseListOrder(*M);
+  outs() << "reverse\n";
   reverseUseLists(*M);
   verifyUseListOrder(*M);
 
   for (unsigned I = 0, E = NumShuffles; I != E; ++I) {
-    errs() << "*** shuffle iteration: " << I + 1 << " of " << E << " ***\n";
+    outs() << "\n";
 
     // Shuffle with a different (deterministic) seed each time.
+    outs() << "shuffle (" << I + 1 << " of " << E << ")\n";
     shuffleUseLists(*M, I);
 
     // Verify again before and after reversing.
     verifyUseListOrder(*M);
+    outs() << "reverse\n";
     reverseUseLists(*M);
     verifyUseListOrder(*M);
   }
diff --git a/tools/yaml2obj/yaml2coff.cpp b/tools/yaml2obj/yaml2coff.cpp
index 6939bc4..61d9851 100644
--- a/tools/yaml2obj/yaml2coff.cpp
+++ b/tools/yaml2obj/yaml2coff.cpp
@@ -253,10 +253,7 @@ binary_le_impl<value_type> binary_le(value_type V) {
   return binary_le_impl<value_type>(V);
 }
 
-template <size_t NumBytes>
-struct zeros_impl {
-  zeros_impl() {}
-};
+template <size_t NumBytes> struct zeros_impl {};
 
 template <size_t NumBytes>
 raw_ostream &operator<<(raw_ostream &OS, const zeros_impl<NumBytes> &) {
diff --git a/unittests/ADT/DAGDeltaAlgorithmTest.cpp b/unittests/ADT/DAGDeltaAlgorithmTest.cpp
index 370b7c2..190df7f 100644
--- a/unittests/ADT/DAGDeltaAlgorithmTest.cpp
+++ b/unittests/ADT/DAGDeltaAlgorithmTest.cpp
@@ -22,7 +22,7 @@ class FixedDAGDeltaAlgorithm : public DAGDeltaAlgorithm {
   unsigned NumTests;
 
 protected:
-  virtual bool ExecuteOneTest(const changeset_ty &Changes) {
+  bool ExecuteOneTest(const changeset_ty &Changes) override {
     ++NumTests;
     return std::includes(Changes.begin(), Changes.end(),
                          FailingSet.begin(), FailingSet.end());
diff --git a/unittests/ADT/DeltaAlgorithmTest.cpp b/unittests/ADT/DeltaAlgorithmTest.cpp
index a33f2b4..bed57b1 100644
--- a/unittests/ADT/DeltaAlgorithmTest.cpp
+++ b/unittests/ADT/DeltaAlgorithmTest.cpp
@@ -37,7 +37,7 @@ class FixedDeltaAlgorithm final : public DeltaAlgorithm {
   unsigned NumTests;
 
 protected:
-  virtual bool ExecuteOneTest(const changeset_ty &Changes) {
+  bool ExecuteOneTest(const changeset_ty &Changes) override {
     ++NumTests;
     return std::includes(Changes.begin(), Changes.end(),
                          FailingSet.begin(), FailingSet.end());
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 97ff90b..46f7021 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -156,10 +156,7 @@ LLVM_ATTRIBUTE_USED void CompileTest() {
 
 class SmallVectorTestBase : public testing::Test {
 protected:
-
-  void SetUp() {
-    Constructable::reset();
-  }
+  void SetUp() override { Constructable::reset(); }
 
   template <typename VectorT>
   void assertEmpty(VectorT & v) {
diff --git a/unittests/Analysis/AliasAnalysisTest.cpp b/unittests/Analysis/AliasAnalysisTest.cpp
new file mode 100644
index 0000000..1ba396c
--- /dev/null
+++ b/unittests/Analysis/AliasAnalysisTest.cpp
@@ -0,0 +1,94 @@
+//===--- AliasAnalysisTest.cpp - Mixed TBAA unit tests --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+class AliasAnalysisTest : public testing::Test {
+protected:
+  AliasAnalysisTest() : M("AliasAnalysisTBAATest", C) {}
+
+  // This is going to check that calling getModRefInfo without a location, and
+  // with a default location, first, doesn't crash, and second, gives the right
+  // answer.
+  void CheckModRef(Instruction *I, AliasAnalysis::ModRefResult Result) {
+    static char ID;
+    class CheckModRefTestPass : public FunctionPass {
+    public:
+      CheckModRefTestPass(Instruction *I, AliasAnalysis::ModRefResult Result)
+          : FunctionPass(ID), ExpectResult(Result), I(I) {}
+      static int initialize() {
+        PassInfo *PI = new PassInfo("CheckModRef testing pass", "", &ID,
+                                    nullptr, true, true);
+        PassRegistry::getPassRegistry()->registerPass(*PI, false);
+        initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+        initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+        return 0;
+      }
+      void getAnalysisUsage(AnalysisUsage &AU) const override {
+        AU.setPreservesAll();
+        AU.addRequiredTransitive<AliasAnalysis>();
+      }
+      bool runOnFunction(Function &) override {
+        AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+        EXPECT_EQ(AA.getModRefInfo(I, AliasAnalysis::Location()), ExpectResult);
+        EXPECT_EQ(AA.getModRefInfo(I), ExpectResult);
+        return false;
+      }
+      AliasAnalysis::ModRefResult ExpectResult;
+      Instruction *I;
+    };
+    static int initialize = CheckModRefTestPass::initialize();
+    (void)initialize;
+    CheckModRefTestPass *P = new CheckModRefTestPass(I, Result);
+    legacy::PassManager PM;
+    PM.add(createBasicAliasAnalysisPass());
+    PM.add(P);
+    PM.run(M);
+  }
+
+  LLVMContext C;
+  Module M;
+};
+
+TEST_F(AliasAnalysisTest, getModRefInfo) {
+  // Setup function.
+  FunctionType *FTy =
+      FunctionType::get(Type::getVoidTy(C), std::vector<Type *>(), false);
+  auto *F = cast<Function>(M.getOrInsertFunction("f", FTy));
+  auto *BB = BasicBlock::Create(C, "entry", F);
+  auto IntType = Type::getInt32Ty(C);
+  auto PtrType = Type::getInt32PtrTy(C);
+  auto *Value = ConstantInt::get(IntType, 42);
+  auto *Addr = ConstantPointerNull::get(PtrType);
+
+  auto *Store1 = new StoreInst(Value, Addr, BB);
+  auto *Load1 = new LoadInst(Addr, "load", BB);
+  auto *Add1 = BinaryOperator::CreateAdd(Value, Value, "add", BB);
+
+  ReturnInst::Create(C, nullptr, BB);
+
+  // Check basic results
+  CheckModRef(Store1, AliasAnalysis::ModRefResult::Mod);
+  CheckModRef(Load1, AliasAnalysis::ModRefResult::Ref);
+  CheckModRef(Add1, AliasAnalysis::ModRefResult::NoModRef);
+}
+
+} // end anonymous namspace
+} // end llvm namespace
diff --git a/unittests/Analysis/CFGTest.cpp b/unittests/Analysis/CFGTest.cpp
index 4b4ebb6..b29c168 100644
--- a/unittests/Analysis/CFGTest.cpp
+++ b/unittests/Analysis/CFGTest.cpp
@@ -78,13 +78,13 @@ protected:
         return 0;
       }
 
-      void getAnalysisUsage(AnalysisUsage &AU) const {
+      void getAnalysisUsage(AnalysisUsage &AU) const override {
         AU.setPreservesAll();
         AU.addRequired<LoopInfoWrapperPass>();
         AU.addRequired<DominatorTreeWrapperPass>();
       }
 
-      bool runOnFunction(Function &F) {
+      bool runOnFunction(Function &F) override {
         if (!F.hasName() || F.getName() != "test")
           return false;
 
diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt
index baf0c28..35a6d92 100644
--- a/unittests/Analysis/CMakeLists.txt
+++ b/unittests/Analysis/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_llvm_unittest(AnalysisTests
+  AliasAnalysisTest.cpp
   CallGraphTest.cpp
   CFGTest.cpp
   LazyCallGraphTest.cpp
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index 876a264..6ce7ff4 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -25,7 +25,7 @@ namespace {
 class ScalarEvolutionsTest : public testing::Test {
 protected:
   ScalarEvolutionsTest() : M("", Context), SE(*new ScalarEvolution) {}
-  ~ScalarEvolutionsTest() {
+  ~ScalarEvolutionsTest() override {
     // Manually clean up, since we allocated new SCEV objects after the
     // pass was finished.
     SE.releaseMemory();
diff --git a/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
index 8ffc1c8..bb47c4c 100644
--- a/unittests/ExecutionEngine/ExecutionEngineTest.cpp
+++ b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -33,7 +33,7 @@ protected:
     Engine.reset(EngineBuilder(std::move(Owner)).setErrorStr(&Error).create());
   }
 
-  virtual void SetUp() {
+  void SetUp() override {
     ASSERT_TRUE(Engine.get() != nullptr) << "EngineBuilder returned error: '"
       << Error << "'";
   }
@@ -54,6 +54,7 @@ TEST_F(ExecutionEngineTest, ForwardGlobalMapping) {
   int32_t Mem1 = 3;
   Engine->addGlobalMapping(G1, &Mem1);
   EXPECT_EQ(&Mem1, Engine->getPointerToGlobalIfAvailable(G1));
+  EXPECT_EQ(&Mem1, Engine->getPointerToGlobalIfAvailable("Global1"));
   int32_t Mem2 = 4;
   Engine->updateGlobalMapping(G1, &Mem2);
   EXPECT_EQ(&Mem2, Engine->getPointerToGlobalIfAvailable(G1));
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
index f2a3000..a7c9ae0 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
@@ -85,13 +85,11 @@ public:
     ReservedCodeSize(0), UsedCodeSize(0), ReservedDataSizeRO(0), 
     UsedDataSizeRO(0), ReservedDataSizeRW(0), UsedDataSizeRW(0) {    
   }
-  
-  virtual bool needsToReserveAllocationSpace() {
-    return true;
-  }
 
-  virtual void reserveAllocationSpace(
-      uintptr_t CodeSize, uintptr_t DataSizeRO, uintptr_t DataSizeRW) {
+  bool needsToReserveAllocationSpace() override { return true; }
+
+  void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
+                              uintptr_t DataSizeRW) override {
     ReservedCodeSize = CodeSize;
     ReservedDataSizeRO = DataSizeRO;
     ReservedDataSizeRW = DataSizeRW;
@@ -103,15 +101,17 @@ public:
     *UsedSize = AlignedBegin + AlignedSize;
   }
 
-  virtual uint8_t* allocateDataSection(uintptr_t Size, unsigned Alignment,
-      unsigned SectionID, StringRef SectionName, bool IsReadOnly) {
+  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID, StringRef SectionName,
+                               bool IsReadOnly) override {
     useSpace(IsReadOnly ? &UsedDataSizeRO : &UsedDataSizeRW, Size, Alignment);
     return SectionMemoryManager::allocateDataSection(Size, Alignment, 
       SectionID, SectionName, IsReadOnly);
   }
 
-  uint8_t* allocateCodeSection(uintptr_t Size, unsigned Alignment, 
-      unsigned SectionID, StringRef SectionName) {
+  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID,
+                               StringRef SectionName) override {
     useSpace(&UsedCodeSize, Size, Alignment);
     return SectionMemoryManager::allocateCodeSection(Size, Alignment, 
       SectionID, SectionName);
@@ -141,8 +141,8 @@ protected:
     // that they will fail the MCJIT C API tests.
     UnsupportedEnvironments.push_back(Triple::Cygnus);
   }
-  
-  virtual void SetUp() {
+
+  void SetUp() override {
     didCallAllocateCodeSection = false;
     didAllocateCompactUnwindSection = false;
     didCallYield = false;
@@ -151,8 +151,8 @@ protected:
     Engine = nullptr;
     Error = nullptr;
   }
-  
-  virtual void TearDown() {
+
+  void TearDown() override {
     if (Engine)
       LLVMDisposeExecutionEngine(Engine);
     else if (Module)
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
index 2e38dd8..ff5b6e3 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
@@ -33,7 +33,7 @@ public:
     ObjMap[ModuleID] = copyBuffer(Obj);
   }
 
-  virtual std::unique_ptr<MemoryBuffer> getObject(const Module* M) override {
+  std::unique_ptr<MemoryBuffer> getObject(const Module *M) override {
     const MemoryBuffer* BufferFound = getObjectInternal(M);
     ModulesLookedUp.insert(M->getModuleIdentifier());
     if (!BufferFound)
@@ -84,7 +84,7 @@ protected:
     ReplacementRC = 7
   };
 
-  virtual void SetUp() {
+  void SetUp() override {
     M.reset(createEmptyModule("<main>"));
     Main = insertMainFunction(M.get(), OriginalRC);
   }
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
index 64d8c2f..94b9a69 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
@@ -22,7 +22,7 @@ namespace {
 
 class MCJITTest : public testing::Test, public MCJITTestBase {
 protected:
-  virtual void SetUp() { M.reset(createEmptyModule("<main>")); }
+  void SetUp() override { M.reset(createEmptyModule("<main>")); }
 };
 
 // FIXME: Ensure creating an execution engine does not crash when constructed
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
index 35af417..901f142 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
@@ -308,11 +308,6 @@ protected:
     SupportedSubArchs.push_back("armv6");
     SupportedSubArchs.push_back("armv7");
 
-    // The operating systems below are known to be incompatible with MCJIT as
-    // they are copied from the test/ExecutionEngine/MCJIT/lit.local.cfg and
-    // should be kept in sync.
-    UnsupportedOSs.push_back(Triple::Darwin);
-
     UnsupportedEnvironments.push_back(Triple::Cygnus);
   }
 
diff --git a/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp b/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
index b0ff127..a495766 100644
--- a/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
+++ b/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
@@ -14,9 +14,11 @@ namespace {
 
 struct MockBaseLayer {
   typedef int ModuleSetHandleT;
-  ModuleSetHandleT addModuleSet(std::list<std::unique_ptr<llvm::Module>>,
-                                std::unique_ptr<llvm::RTDyldMemoryManager> x) {
-    EXPECT_FALSE(x);
+  ModuleSetHandleT addModuleSet(
+                  std::list<std::unique_ptr<llvm::Module>>,
+                  std::unique_ptr<llvm::RuntimeDyld::MemoryManager> MemMgr,
+                  std::unique_ptr<llvm::RuntimeDyld::SymbolResolver> Resolver) {
+    EXPECT_FALSE(MemMgr);
     return 42;
   }
 };
@@ -24,7 +26,7 @@ struct MockBaseLayer {
 TEST(LazyEmittingLayerTest, Empty) {
   MockBaseLayer M;
   llvm::orc::LazyEmittingLayer<MockBaseLayer> L(M);
-  L.addModuleSet(std::list<std::unique_ptr<llvm::Module>>(), nullptr);
+  L.addModuleSet(std::list<std::unique_ptr<llvm::Module>>(), nullptr, nullptr);
 }
 
 }
diff --git a/unittests/IR/ConstantsTest.cpp b/unittests/IR/ConstantsTest.cpp
index 0e040bc..0c7777d 100644
--- a/unittests/IR/ConstantsTest.cpp
+++ b/unittests/IR/ConstantsTest.cpp
@@ -249,7 +249,8 @@ TEST(ConstantsTest, AsInstructionsTest) {
   //        not a normal one!
   //CHECK(ConstantExpr::getGetElementPtr(Global, V, false),
   //      "getelementptr i32*, i32** @dummy, i32 1");
-  CHECK(ConstantExpr::getInBoundsGetElementPtr(Global, V),
+  CHECK(ConstantExpr::getInBoundsGetElementPtr(PointerType::getUnqual(Int32Ty),
+                                               Global, V),
         "getelementptr inbounds i32*, i32** @dummy, i32 1");
 
   CHECK(ConstantExpr::getExtractElement(P6, One), "extractelement <2 x i16> "
@@ -266,7 +267,8 @@ TEST(ConstantsTest, ReplaceWithConstantTest) {
 
   Constant *Global =
       M->getOrInsertGlobal("dummy", PointerType::getUnqual(Int32Ty));
-  Constant *GEP = ConstantExpr::getGetElementPtr(Global, One);
+  Constant *GEP = ConstantExpr::getGetElementPtr(
+      PointerType::getUnqual(Int32Ty), Global, One);
   EXPECT_DEATH(Global->replaceAllUsesWith(GEP),
                "this->replaceAllUsesWith\\(expr\\(this\\)\\) is NOT valid!");
 }
@@ -333,7 +335,7 @@ TEST(ConstantsTest, GEPReplaceWithConstant) {
   auto *C1 = ConstantInt::get(IntTy, 1);
   auto *Placeholder = new GlobalVariable(
       *M, IntTy, false, GlobalValue::ExternalWeakLinkage, nullptr);
-  auto *GEP = ConstantExpr::getGetElementPtr(Placeholder, C1);
+  auto *GEP = ConstantExpr::getGetElementPtr(IntTy, Placeholder, C1);
   ASSERT_EQ(GEP->getOperand(0), Placeholder);
 
   auto *Ref =
diff --git a/unittests/IR/DebugInfoTest.cpp b/unittests/IR/DebugInfoTest.cpp
index a957b99..534663a 100644
--- a/unittests/IR/DebugInfoTest.cpp
+++ b/unittests/IR/DebugInfoTest.cpp
@@ -7,127 +7,75 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
 
-namespace llvm {
-
-static void PrintTo(const StringRef &S, ::std::ostream *os) {
-  *os << "(" << (const void *)S.data() << "," << S.size() << ") = '";
-  for (auto C : S)
-    if (C)
-      *os << C;
-    else
-      *os << "\\00";
-  *os << "'";
-}
-static void PrintTo(const DIHeaderFieldIterator &I, ::std::ostream *os) {
-  PrintTo(I.getCurrent(), os);
-  *os << " in ";
-  PrintTo(I.getHeader(), os);
-}
-
-} // end namespace llvm
-
 namespace {
 
-#define MAKE_FIELD_ITERATOR(S)                                                 \
-  DIHeaderFieldIterator(StringRef(S, sizeof(S) - 1))
-TEST(DebugInfoTest, DIHeaderFieldIterator) {
-  ASSERT_EQ(DIHeaderFieldIterator(), DIHeaderFieldIterator());
-
-  ASSERT_NE(DIHeaderFieldIterator(), MAKE_FIELD_ITERATOR(""));
-  ASSERT_EQ(DIHeaderFieldIterator(), ++MAKE_FIELD_ITERATOR(""));
-  ASSERT_EQ("", *DIHeaderFieldIterator(""));
-
-  ASSERT_NE(DIHeaderFieldIterator(), MAKE_FIELD_ITERATOR("stuff"));
-  ASSERT_EQ(DIHeaderFieldIterator(), ++MAKE_FIELD_ITERATOR("stuff"));
-  ASSERT_EQ("stuff", *DIHeaderFieldIterator("stuff"));
-
-  ASSERT_NE(DIHeaderFieldIterator(), MAKE_FIELD_ITERATOR("st\0uff"));
-  ASSERT_NE(DIHeaderFieldIterator(), ++MAKE_FIELD_ITERATOR("st\0uff"));
-  ASSERT_EQ(DIHeaderFieldIterator(), ++++MAKE_FIELD_ITERATOR("st\0uff"));
-  ASSERT_EQ("st", *MAKE_FIELD_ITERATOR("st\0uff"));
-  ASSERT_EQ("uff", *++MAKE_FIELD_ITERATOR("st\0uff"));
-
-  ASSERT_NE(DIHeaderFieldIterator(), MAKE_FIELD_ITERATOR("stuff\0"));
-  ASSERT_NE(DIHeaderFieldIterator(), ++MAKE_FIELD_ITERATOR("stuff\0"));
-  ASSERT_EQ(DIHeaderFieldIterator(), ++++MAKE_FIELD_ITERATOR("stuff\0"));
-  ASSERT_EQ("stuff", *MAKE_FIELD_ITERATOR("stuff\0"));
-  ASSERT_EQ("", *++MAKE_FIELD_ITERATOR("stuff\0"));
-
-  ASSERT_NE(DIHeaderFieldIterator(), MAKE_FIELD_ITERATOR("\0stuff"));
-  ASSERT_NE(DIHeaderFieldIterator(), ++MAKE_FIELD_ITERATOR("\0stuff"));
-  ASSERT_EQ(DIHeaderFieldIterator(), ++++MAKE_FIELD_ITERATOR("\0stuff"));
-  ASSERT_EQ("", *MAKE_FIELD_ITERATOR("\0stuff"));
-  ASSERT_EQ("stuff", *++MAKE_FIELD_ITERATOR("\0stuff"));
-}
-
-TEST(DIDescriptorTest, getFlag) {
+TEST(DebugNodeTest, getFlag) {
   // Some valid flags.
-  EXPECT_EQ(DIDescriptor::FlagPublic, DIDescriptor::getFlag("DIFlagPublic"));
-  EXPECT_EQ(DIDescriptor::FlagProtected,
-            DIDescriptor::getFlag("DIFlagProtected"));
-  EXPECT_EQ(DIDescriptor::FlagPrivate, DIDescriptor::getFlag("DIFlagPrivate"));
-  EXPECT_EQ(DIDescriptor::FlagVector, DIDescriptor::getFlag("DIFlagVector"));
-  EXPECT_EQ(DIDescriptor::FlagRValueReference,
-            DIDescriptor::getFlag("DIFlagRValueReference"));
+  EXPECT_EQ(DebugNode::FlagPublic, DebugNode::getFlag("DIFlagPublic"));
+  EXPECT_EQ(DebugNode::FlagProtected, DebugNode::getFlag("DIFlagProtected"));
+  EXPECT_EQ(DebugNode::FlagPrivate, DebugNode::getFlag("DIFlagPrivate"));
+  EXPECT_EQ(DebugNode::FlagVector, DebugNode::getFlag("DIFlagVector"));
+  EXPECT_EQ(DebugNode::FlagRValueReference,
+            DebugNode::getFlag("DIFlagRValueReference"));
 
   // FlagAccessibility shouldn't work.
-  EXPECT_EQ(0u, DIDescriptor::getFlag("DIFlagAccessibility"));
+  EXPECT_EQ(0u, DebugNode::getFlag("DIFlagAccessibility"));
 
   // Some other invalid strings.
-  EXPECT_EQ(0u, DIDescriptor::getFlag("FlagVector"));
-  EXPECT_EQ(0u, DIDescriptor::getFlag("Vector"));
-  EXPECT_EQ(0u, DIDescriptor::getFlag("other things"));
-  EXPECT_EQ(0u, DIDescriptor::getFlag("DIFlagOther"));
+  EXPECT_EQ(0u, DebugNode::getFlag("FlagVector"));
+  EXPECT_EQ(0u, DebugNode::getFlag("Vector"));
+  EXPECT_EQ(0u, DebugNode::getFlag("other things"));
+  EXPECT_EQ(0u, DebugNode::getFlag("DIFlagOther"));
 }
 
-TEST(DIDescriptorTest, getFlagString) {
+TEST(DebugNodeTest, getFlagString) {
   // Some valid flags.
   EXPECT_EQ(StringRef("DIFlagPublic"),
-            DIDescriptor::getFlagString(DIDescriptor::FlagPublic));
+            DebugNode::getFlagString(DebugNode::FlagPublic));
   EXPECT_EQ(StringRef("DIFlagProtected"),
-            DIDescriptor::getFlagString(DIDescriptor::FlagProtected));
+            DebugNode::getFlagString(DebugNode::FlagProtected));
   EXPECT_EQ(StringRef("DIFlagPrivate"),
-            DIDescriptor::getFlagString(DIDescriptor::FlagPrivate));
+            DebugNode::getFlagString(DebugNode::FlagPrivate));
   EXPECT_EQ(StringRef("DIFlagVector"),
-            DIDescriptor::getFlagString(DIDescriptor::FlagVector));
+            DebugNode::getFlagString(DebugNode::FlagVector));
   EXPECT_EQ(StringRef("DIFlagRValueReference"),
-            DIDescriptor::getFlagString(DIDescriptor::FlagRValueReference));
+            DebugNode::getFlagString(DebugNode::FlagRValueReference));
 
   // FlagAccessibility actually equals FlagPublic.
   EXPECT_EQ(StringRef("DIFlagPublic"),
-            DIDescriptor::getFlagString(DIDescriptor::FlagAccessibility));
+            DebugNode::getFlagString(DebugNode::FlagAccessibility));
 
   // Some other invalid flags.
-  EXPECT_EQ(StringRef(), DIDescriptor::getFlagString(DIDescriptor::FlagPublic |
-                                                     DIDescriptor::FlagVector));
-  EXPECT_EQ(StringRef(),
-            DIDescriptor::getFlagString(DIDescriptor::FlagFwdDecl |
-                                        DIDescriptor::FlagArtificial));
-  EXPECT_EQ(StringRef(), DIDescriptor::getFlagString(0xffff));
+  EXPECT_EQ(StringRef(), DebugNode::getFlagString(DebugNode::FlagPublic |
+                                                  DebugNode::FlagVector));
+  EXPECT_EQ(StringRef(), DebugNode::getFlagString(DebugNode::FlagFwdDecl |
+                                                  DebugNode::FlagArtificial));
+  EXPECT_EQ(StringRef(), DebugNode::getFlagString(0xffff));
 }
 
-TEST(DIDescriptorTest, splitFlags) {
-  // Some valid flags.
+TEST(DebugNodeTest, splitFlags) {
+// Some valid flags.
 #define CHECK_SPLIT(FLAGS, VECTOR, REMAINDER)                                  \
   {                                                                            \
     SmallVector<unsigned, 8> V;                                                \
-    EXPECT_EQ(REMAINDER, DIDescriptor::splitFlags(FLAGS, V));                  \
-    EXPECT_TRUE(makeArrayRef(V).equals(VECTOR));                                \
+    EXPECT_EQ(REMAINDER, DebugNode::splitFlags(FLAGS, V));                     \
+    EXPECT_TRUE(makeArrayRef(V).equals(VECTOR));                               \
   }
-  CHECK_SPLIT(DIDescriptor::FlagPublic, {DIDescriptor::FlagPublic}, 0u);
-  CHECK_SPLIT(DIDescriptor::FlagProtected, {DIDescriptor::FlagProtected}, 0u);
-  CHECK_SPLIT(DIDescriptor::FlagPrivate, {DIDescriptor::FlagPrivate}, 0u);
-  CHECK_SPLIT(DIDescriptor::FlagVector, {DIDescriptor::FlagVector}, 0u);
-  CHECK_SPLIT(DIDescriptor::FlagRValueReference, {DIDescriptor::FlagRValueReference}, 0u);
-  unsigned Flags[] = {DIDescriptor::FlagFwdDecl, DIDescriptor::FlagVector};
-  CHECK_SPLIT(DIDescriptor::FlagFwdDecl | DIDescriptor::FlagVector, Flags, 0u);
+  CHECK_SPLIT(DebugNode::FlagPublic, {DebugNode::FlagPublic}, 0u);
+  CHECK_SPLIT(DebugNode::FlagProtected, {DebugNode::FlagProtected}, 0u);
+  CHECK_SPLIT(DebugNode::FlagPrivate, {DebugNode::FlagPrivate}, 0u);
+  CHECK_SPLIT(DebugNode::FlagVector, {DebugNode::FlagVector}, 0u);
+  CHECK_SPLIT(DebugNode::FlagRValueReference, {DebugNode::FlagRValueReference},
+              0u);
+  unsigned Flags[] = {DebugNode::FlagFwdDecl, DebugNode::FlagVector};
+  CHECK_SPLIT(DebugNode::FlagFwdDecl | DebugNode::FlagVector, Flags, 0u);
   CHECK_SPLIT(0x100000u, {}, 0x100000u);
-  CHECK_SPLIT(0x100000u | DIDescriptor::FlagVector, {DIDescriptor::FlagVector},
+  CHECK_SPLIT(0x100000u | DebugNode::FlagVector, {DebugNode::FlagVector},
               0x100000u);
 #undef CHECK_SPLIT
 }
diff --git a/unittests/IR/DominatorTreeTest.cpp b/unittests/IR/DominatorTreeTest.cpp
index 8d2dc41..6a5838e 100644
--- a/unittests/IR/DominatorTreeTest.cpp
+++ b/unittests/IR/DominatorTreeTest.cpp
@@ -10,6 +10,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -25,7 +26,7 @@ namespace llvm {
   namespace {
     struct DPass : public FunctionPass {
       static char ID;
-      virtual bool runOnFunction(Function &F) {
+      bool runOnFunction(Function &F) override {
         DominatorTree *DT =
             &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
         PostDominatorTree *PDT = &getAnalysis<PostDominatorTree>();
@@ -174,9 +175,36 @@ namespace llvm {
         EXPECT_EQ(DominatedBBs.size(), 0UL);
         EXPECT_EQ(PostDominatedBBs.size(), 0UL);
 
+        // Check DFS Numbers before
+        EXPECT_EQ(DT->getNode(BB0)->getDFSNumIn(), 0UL);
+        EXPECT_EQ(DT->getNode(BB0)->getDFSNumOut(), 7UL);
+        EXPECT_EQ(DT->getNode(BB1)->getDFSNumIn(), 1UL);
+        EXPECT_EQ(DT->getNode(BB1)->getDFSNumOut(), 2UL);
+        EXPECT_EQ(DT->getNode(BB2)->getDFSNumIn(), 5UL);
+        EXPECT_EQ(DT->getNode(BB2)->getDFSNumOut(), 6UL);
+        EXPECT_EQ(DT->getNode(BB4)->getDFSNumIn(), 3UL);
+        EXPECT_EQ(DT->getNode(BB4)->getDFSNumOut(), 4UL);
+
+        // Reattach block 3 to block 1 and recalculate
+        BB1->getTerminator()->eraseFromParent();
+        BranchInst::Create(BB4, BB3, ConstantInt::getTrue(F.getContext()), BB1);
+        DT->recalculate(F);
+
+        // Check DFS Numbers after
+        EXPECT_EQ(DT->getNode(BB0)->getDFSNumIn(), 0UL);
+        EXPECT_EQ(DT->getNode(BB0)->getDFSNumOut(), 9UL);
+        EXPECT_EQ(DT->getNode(BB1)->getDFSNumIn(), 1UL);
+        EXPECT_EQ(DT->getNode(BB1)->getDFSNumOut(), 4UL);
+        EXPECT_EQ(DT->getNode(BB2)->getDFSNumIn(), 7UL);
+        EXPECT_EQ(DT->getNode(BB2)->getDFSNumOut(), 8UL);
+        EXPECT_EQ(DT->getNode(BB3)->getDFSNumIn(), 2UL);
+        EXPECT_EQ(DT->getNode(BB3)->getDFSNumOut(), 3UL);
+        EXPECT_EQ(DT->getNode(BB4)->getDFSNumIn(), 5UL);
+        EXPECT_EQ(DT->getNode(BB4)->getDFSNumOut(), 6UL);
+
         return false;
       }
-      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      void getAnalysisUsage(AnalysisUsage &AU) const override {
         AU.addRequired<DominatorTreeWrapperPass>();
         AU.addRequired<PostDominatorTree>();
       }
diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp
index ca378a3..9fe241c 100644
--- a/unittests/IR/IRBuilderTest.cpp
+++ b/unittests/IR/IRBuilderTest.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/NoFolder.h"
+#include "llvm/IR/Verifier.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -25,7 +26,7 @@ namespace {
 
 class IRBuilderTest : public testing::Test {
 protected:
-  virtual void SetUp() {
+  void SetUp() override {
     M.reset(new Module("MyModule", Ctx));
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
                                           /*isVarArg=*/false);
@@ -35,7 +36,7 @@ protected:
                             GlobalValue::ExternalLinkage, nullptr);
   }
 
-  virtual void TearDown() {
+  void TearDown() override {
     BB = nullptr;
     M.reset();
   }
@@ -295,14 +296,14 @@ TEST_F(IRBuilderTest, DIBuilder) {
   auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, "F.CBL", "/",
                                   "llvm-cobol74", true, "", 0);
   auto Type = DIB.createSubroutineType(File, DIB.getOrCreateTypeArray(None));
-  auto SP = DIB.createFunction(CU, "foo", "", File, 1, Type,
-                               false, true, 1, 0, true, F);
-  EXPECT_TRUE(SP.Verify());
+  DIB.createFunction(CU, "foo", "", File, 1, Type, false, true, 1, 0, true, F);
   AllocaInst *I = Builder.CreateAlloca(Builder.getInt8Ty());
-  auto BadScope = DIB.createLexicalBlockFile(DIDescriptor(), File, 0);
+  auto BarSP = DIB.createFunction(CU, "bar", "", File, 1, Type, false, true, 1,
+                                  0, true, nullptr);
+  auto BadScope = DIB.createLexicalBlockFile(BarSP, File, 0);
   I->setDebugLoc(DebugLoc::get(2, 0, BadScope));
-  EXPECT_FALSE(SP.Verify());
   DIB.finalize();
+  EXPECT_TRUE(verifyModule(*M));
 }
 
 
diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp
index 51a9e0b..8b4c5db 100644
--- a/unittests/IR/MetadataTest.cpp
+++ b/unittests/IR/MetadataTest.cpp
@@ -61,8 +61,14 @@ TEST(ContextAndReplaceableUsesTest, takeReplaceableUses) {
 }
 
 class MetadataTest : public testing::Test {
+public:
+  MetadataTest() : M("test", Context), Counter(0) {}
+
 protected:
   LLVMContext Context;
+  Module M;
+  int Counter;
+
   MDNode *getNode() { return MDNode::get(Context, None); }
   MDNode *getNode(Metadata *MD) { return MDNode::get(Context, MD); }
   MDNode *getNode(Metadata *MD1, Metadata *MD2) {
@@ -70,11 +76,45 @@ protected:
     return MDNode::get(Context, MDs);
   }
 
+  MDTuple *getTuple() { return MDTuple::getDistinct(Context, None); }
+  MDSubroutineType *getSubroutineType() {
+    return MDSubroutineType::getDistinct(Context, 0, getNode(nullptr));
+  }
   MDSubprogram *getSubprogram() {
     return MDSubprogram::getDistinct(Context, nullptr, "", "", nullptr, 0,
                                      nullptr, false, false, 0, nullptr, 0, 0, 0,
                                      0);
   }
+  MDScopeRef getSubprogramRef() { return getSubprogram()->getRef(); }
+  MDFile *getFile() {
+    return MDFile::getDistinct(Context, "file.c", "/path/to/dir");
+  }
+  MDTypeRef getBasicType(StringRef Name) {
+    return MDBasicType::get(Context, dwarf::DW_TAG_unspecified_type, Name)
+        ->getRef();
+  }
+  MDTypeRef getDerivedType() {
+    return MDDerivedType::getDistinct(Context, dwarf::DW_TAG_pointer_type, "",
+                                      nullptr, 0, nullptr,
+                                      getBasicType("basictype"), 1, 2, 0, 0)
+        ->getRef();
+  }
+  Constant *getConstant() {
+    return ConstantInt::get(Type::getInt32Ty(Context), Counter++);
+  }
+  ConstantAsMetadata *getConstantAsMetadata() {
+    return ConstantAsMetadata::get(getConstant());
+  }
+  MDTypeRef getCompositeType() {
+    return MDCompositeType::getDistinct(
+               Context, dwarf::DW_TAG_structure_type, "", nullptr, 0, nullptr,
+               nullptr, 32, 32, 0, 0, nullptr, 0, nullptr, nullptr, "")
+        ->getRef();
+  }
+  Function *getFunction(StringRef Name) {
+    return cast<Function>(M.getOrInsertFunction(
+        Name, FunctionType::get(Type::getVoidTy(Context), None, false)));
+  }
 };
 typedef MetadataTest MDStringTest;
 
@@ -593,6 +633,48 @@ TEST_F(MDNodeTest, replaceWithUniqued) {
   }
 }
 
+TEST_F(MDNodeTest, replaceWithUniquedResolvingOperand) {
+  // temp !{}
+  MDTuple *Op = MDTuple::getTemporary(Context, None).release();
+  EXPECT_FALSE(Op->isResolved());
+
+  // temp !{temp !{}}
+  Metadata *Ops[] = {Op};
+  MDTuple *N = MDTuple::getTemporary(Context, Ops).release();
+  EXPECT_FALSE(N->isResolved());
+
+  // temp !{temp !{}} => !{temp !{}}
+  ASSERT_EQ(N, MDNode::replaceWithUniqued(TempMDTuple(N)));
+  EXPECT_FALSE(N->isResolved());
+
+  // !{temp !{}} => !{!{}}
+  ASSERT_EQ(Op, MDNode::replaceWithUniqued(TempMDTuple(Op)));
+  EXPECT_TRUE(Op->isResolved());
+  EXPECT_TRUE(N->isResolved());
+}
+
+TEST_F(MDNodeTest, replaceWithUniquedChangingOperand) {
+  // i1* @GV
+  Type *Ty = Type::getInt1PtrTy(Context);
+  std::unique_ptr<GlobalVariable> GV(
+      new GlobalVariable(Ty, false, GlobalValue::ExternalLinkage));
+  ConstantAsMetadata *Op = ConstantAsMetadata::get(GV.get());
+
+  // temp !{i1* @GV}
+  Metadata *Ops[] = {Op};
+  MDTuple *N = MDTuple::getTemporary(Context, Ops).release();
+
+  // temp !{i1* @GV} => !{i1* @GV}
+  ASSERT_EQ(N, MDNode::replaceWithUniqued(TempMDTuple(N)));
+  ASSERT_TRUE(N->isUniqued());
+
+  // !{i1* @GV} => !{null}
+  GV.reset();
+  ASSERT_TRUE(N->isUniqued());
+  Metadata *NullOps[] = {nullptr};
+  ASSERT_EQ(N, MDTuple::get(Context, NullOps));
+}
+
 TEST_F(MDNodeTest, replaceWithDistinct) {
   {
     auto *Empty = MDTuple::get(Context, None);
@@ -768,7 +850,7 @@ TEST_F(MDSubrangeTest, get) {
   auto *N = MDSubrange::get(Context, 5, 7);
   EXPECT_EQ(dwarf::DW_TAG_subrange_type, N->getTag());
   EXPECT_EQ(5, N->getCount());
-  EXPECT_EQ(7, N->getLo());
+  EXPECT_EQ(7, N->getLowerBound());
   EXPECT_EQ(N, MDSubrange::get(Context, 5, 7));
   EXPECT_EQ(MDSubrange::get(Context, 5, 0), MDSubrange::get(Context, 5));
 
@@ -780,7 +862,7 @@ TEST_F(MDSubrangeTest, getEmptyArray) {
   auto *N = MDSubrange::get(Context, -1, 0);
   EXPECT_EQ(dwarf::DW_TAG_subrange_type, N->getTag());
   EXPECT_EQ(-1, N->getCount());
-  EXPECT_EQ(0, N->getLo());
+  EXPECT_EQ(0, N->getLowerBound());
   EXPECT_EQ(N, MDSubrange::get(Context, -1, 0));
 }
 
@@ -865,15 +947,15 @@ TEST_F(MDTypeTest, setFlags) {
 
   MDType *D = MDSubroutineType::getDistinct(Context, 0u, Types);
   EXPECT_EQ(0u, D->getFlags());
-  D->setFlags(DIDescriptor::FlagRValueReference);
-  EXPECT_EQ(DIDescriptor::FlagRValueReference, D->getFlags());
+  D->setFlags(DebugNode::FlagRValueReference);
+  EXPECT_EQ(DebugNode::FlagRValueReference, D->getFlags());
   D->setFlags(0u);
   EXPECT_EQ(0u, D->getFlags());
 
   TempMDType T = MDSubroutineType::getTemporary(Context, 0u, Types);
   EXPECT_EQ(0u, T->getFlags());
-  T->setFlags(DIDescriptor::FlagRValueReference);
-  EXPECT_EQ(DIDescriptor::FlagRValueReference, T->getFlags());
+  T->setFlags(DebugNode::FlagRValueReference);
+  EXPECT_EQ(DebugNode::FlagRValueReference, T->getFlags());
   T->setFlags(0u);
   EXPECT_EQ(0u, T->getFlags());
 }
@@ -881,10 +963,10 @@ TEST_F(MDTypeTest, setFlags) {
 typedef MetadataTest MDDerivedTypeTest;
 
 TEST_F(MDDerivedTypeTest, get) {
-  Metadata *File = MDTuple::getDistinct(Context, None);
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *BaseType = MDTuple::getDistinct(Context, None);
-  Metadata *ExtraData = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
+  MDScopeRef Scope = getSubprogramRef();
+  MDTypeRef BaseType = getBasicType("basic");
+  MDTuple *ExtraData = getTuple();
 
   auto *N = MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type, "something",
                                File, 1, Scope, BaseType, 2, 3, 4, 5, ExtraData);
@@ -910,17 +992,17 @@ TEST_F(MDDerivedTypeTest, get) {
                                   File, 1, Scope, BaseType, 2, 3, 4, 5,
                                   ExtraData));
   EXPECT_NE(N, MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type,
-                                  "something", Scope, 1, Scope, BaseType, 2, 3,
-                                  4, 5, ExtraData));
+                                  "something", getFile(), 1, Scope, BaseType, 2,
+                                  3, 4, 5, ExtraData));
   EXPECT_NE(N, MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type,
                                   "something", File, 2, Scope, BaseType, 2, 3,
                                   4, 5, ExtraData));
-  EXPECT_NE(N,
-            MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type, "something",
-                               File, 1, File, BaseType, 2, 3, 4, 5, ExtraData));
-  EXPECT_NE(N,
-            MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type, "something",
-                               File, 1, Scope, File, 2, 3, 4, 5, ExtraData));
+  EXPECT_NE(N, MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type,
+                                  "something", File, 1, getSubprogramRef(),
+                                  BaseType, 2, 3, 4, 5, ExtraData));
+  EXPECT_NE(N, MDDerivedType::get(
+                   Context, dwarf::DW_TAG_pointer_type, "something", File, 1,
+                   Scope, getBasicType("basic2"), 2, 3, 4, 5, ExtraData));
   EXPECT_NE(N, MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type,
                                   "something", File, 1, Scope, BaseType, 3, 3,
                                   4, 5, ExtraData));
@@ -933,19 +1015,19 @@ TEST_F(MDDerivedTypeTest, get) {
   EXPECT_NE(N, MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type,
                                   "something", File, 1, Scope, BaseType, 2, 3,
                                   4, 4, ExtraData));
-  EXPECT_NE(N,
-            MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type, "something",
-                               File, 1, Scope, BaseType, 2, 3, 4, 5, File));
+  EXPECT_NE(N, MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type,
+                                  "something", File, 1, Scope, BaseType, 2, 3,
+                                  4, 5, getTuple()));
 
   TempMDDerivedType Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
 }
 
 TEST_F(MDDerivedTypeTest, getWithLargeValues) {
-  Metadata *File = MDTuple::getDistinct(Context, None);
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *BaseType = MDTuple::getDistinct(Context, None);
-  Metadata *ExtraData = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
+  MDScopeRef Scope = getSubprogramRef();
+  MDTypeRef BaseType = getBasicType("basic");
+  MDTuple *ExtraData = getTuple();
 
   auto *N = MDDerivedType::get(Context, dwarf::DW_TAG_pointer_type, "something",
                                File, 1, Scope, BaseType, UINT64_MAX,
@@ -960,18 +1042,18 @@ typedef MetadataTest MDCompositeTypeTest;
 TEST_F(MDCompositeTypeTest, get) {
   unsigned Tag = dwarf::DW_TAG_structure_type;
   StringRef Name = "some name";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 1;
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *BaseType = MDTuple::getDistinct(Context, None);
+  MDScopeRef Scope = getSubprogramRef();
+  MDTypeRef BaseType = getCompositeType();
   uint64_t SizeInBits = 2;
   uint64_t AlignInBits = 3;
   uint64_t OffsetInBits = 4;
   unsigned Flags = 5;
-  Metadata *Elements = MDTuple::getDistinct(Context, None);
+  MDTuple *Elements = getTuple();
   unsigned RuntimeLang = 6;
-  Metadata *VTableHolder = MDTuple::getDistinct(Context, None);
-  Metadata *TemplateParams = MDTuple::getDistinct(Context, None);
+  MDTypeRef VTableHolder = getCompositeType();
+  MDTuple *TemplateParams = getTuple();
   StringRef Identifier = "some id";
 
   auto *N = MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
@@ -988,10 +1070,10 @@ TEST_F(MDCompositeTypeTest, get) {
   EXPECT_EQ(AlignInBits, N->getAlignInBits());
   EXPECT_EQ(OffsetInBits, N->getOffsetInBits());
   EXPECT_EQ(Flags, N->getFlags());
-  EXPECT_EQ(Elements, N->getElements());
+  EXPECT_EQ(Elements, N->getElements().get());
   EXPECT_EQ(RuntimeLang, N->getRuntimeLang());
   EXPECT_EQ(VTableHolder, N->getVTableHolder());
-  EXPECT_EQ(TemplateParams, N->getTemplateParams());
+  EXPECT_EQ(TemplateParams, N->getTemplateParams().get());
   EXPECT_EQ(Identifier, N->getIdentifier());
 
   EXPECT_EQ(N, MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
@@ -1007,7 +1089,7 @@ TEST_F(MDCompositeTypeTest, get) {
                                     BaseType, SizeInBits, AlignInBits,
                                     OffsetInBits, Flags, Elements, RuntimeLang,
                                     VTableHolder, TemplateParams, Identifier));
-  EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, Scope, Line, Scope,
+  EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, getFile(), Line, Scope,
                                     BaseType, SizeInBits, AlignInBits,
                                     OffsetInBits, Flags, Elements, RuntimeLang,
                                     VTableHolder, TemplateParams, Identifier));
@@ -1015,14 +1097,14 @@ TEST_F(MDCompositeTypeTest, get) {
                                     BaseType, SizeInBits, AlignInBits,
                                     OffsetInBits, Flags, Elements, RuntimeLang,
                                     VTableHolder, TemplateParams, Identifier));
-  EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, File, Line, File,
-                                    BaseType, SizeInBits, AlignInBits,
-                                    OffsetInBits, Flags, Elements, RuntimeLang,
-                                    VTableHolder, TemplateParams, Identifier));
-  EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, File, Line, Scope, File,
-                                    SizeInBits, AlignInBits, OffsetInBits,
-                                    Flags, Elements, RuntimeLang, VTableHolder,
-                                    TemplateParams, Identifier));
+  EXPECT_NE(N, MDCompositeType::get(
+                   Context, Tag, Name, File, Line, getSubprogramRef(), BaseType,
+                   SizeInBits, AlignInBits, OffsetInBits, Flags, Elements,
+                   RuntimeLang, VTableHolder, TemplateParams, Identifier));
+  EXPECT_NE(N, MDCompositeType::get(
+                   Context, Tag, Name, File, Line, Scope, getBasicType("other"),
+                   SizeInBits, AlignInBits, OffsetInBits, Flags, Elements,
+                   RuntimeLang, VTableHolder, TemplateParams, Identifier));
   EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
                                     BaseType, SizeInBits + 1, AlignInBits,
                                     OffsetInBits, Flags, Elements, RuntimeLang,
@@ -1039,22 +1121,22 @@ TEST_F(MDCompositeTypeTest, get) {
                    Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                    AlignInBits, OffsetInBits, Flags + 1, Elements, RuntimeLang,
                    VTableHolder, TemplateParams, Identifier));
-  EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
-                                    BaseType, SizeInBits, AlignInBits,
-                                    OffsetInBits, Flags, File, RuntimeLang,
-                                    VTableHolder, TemplateParams, Identifier));
+  EXPECT_NE(N, MDCompositeType::get(
+                   Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
+                   AlignInBits, OffsetInBits, Flags, getTuple(), RuntimeLang,
+                   VTableHolder, TemplateParams, Identifier));
   EXPECT_NE(N, MDCompositeType::get(
                    Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                    AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang + 1,
                    VTableHolder, TemplateParams, Identifier));
+  EXPECT_NE(N, MDCompositeType::get(
+                   Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
+                   AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
+                   getCompositeType(), TemplateParams, Identifier));
   EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
                                     BaseType, SizeInBits, AlignInBits,
                                     OffsetInBits, Flags, Elements, RuntimeLang,
-                                    File, TemplateParams, Identifier));
-  EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
-                                    BaseType, SizeInBits, AlignInBits,
-                                    OffsetInBits, Flags, Elements, RuntimeLang,
-                                    VTableHolder, File, Identifier));
+                                    VTableHolder, getTuple(), Identifier));
   EXPECT_NE(N, MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
                                     BaseType, SizeInBits, AlignInBits,
                                     OffsetInBits, Flags, Elements, RuntimeLang,
@@ -1077,18 +1159,18 @@ TEST_F(MDCompositeTypeTest, get) {
 TEST_F(MDCompositeTypeTest, getWithLargeValues) {
   unsigned Tag = dwarf::DW_TAG_structure_type;
   StringRef Name = "some name";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 1;
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *BaseType = MDTuple::getDistinct(Context, None);
+  MDScopeRef Scope = getSubprogramRef();
+  MDTypeRef BaseType = getCompositeType();
   uint64_t SizeInBits = UINT64_MAX;
   uint64_t AlignInBits = UINT64_MAX - 1;
   uint64_t OffsetInBits = UINT64_MAX - 2;
   unsigned Flags = 5;
-  Metadata *Elements = MDTuple::getDistinct(Context, None);
+  MDTuple *Elements = getTuple();
   unsigned RuntimeLang = 6;
-  Metadata *VTableHolder = MDTuple::getDistinct(Context, None);
-  Metadata *TemplateParams = MDTuple::getDistinct(Context, None);
+  MDTypeRef VTableHolder = getCompositeType();
+  MDTuple *TemplateParams = getTuple();
   StringRef Identifier = "some id";
 
   auto *N = MDCompositeType::get(Context, Tag, Name, File, Line, Scope,
@@ -1103,10 +1185,10 @@ TEST_F(MDCompositeTypeTest, getWithLargeValues) {
 TEST_F(MDCompositeTypeTest, replaceOperands) {
   unsigned Tag = dwarf::DW_TAG_structure_type;
   StringRef Name = "some name";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 1;
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *BaseType = MDTuple::getDistinct(Context, None);
+  MDScopeRef Scope = getSubprogramRef();
+  MDTypeRef BaseType = getCompositeType();
   uint64_t SizeInBits = 2;
   uint64_t AlignInBits = 3;
   uint64_t OffsetInBits = 4;
@@ -1120,13 +1202,13 @@ TEST_F(MDCompositeTypeTest, replaceOperands) {
                                  nullptr, nullptr, Identifier);
 
   auto *Elements = MDTuple::getDistinct(Context, None);
-  EXPECT_EQ(nullptr, N->getElements());
+  EXPECT_EQ(nullptr, N->getElements().get());
   N->replaceElements(Elements);
-  EXPECT_EQ(Elements, N->getElements());
+  EXPECT_EQ(Elements, N->getElements().get());
   N->replaceElements(nullptr);
-  EXPECT_EQ(nullptr, N->getElements());
+  EXPECT_EQ(nullptr, N->getElements().get());
 
-  auto *VTableHolder = MDTuple::getDistinct(Context, None);
+  MDTypeRef VTableHolder = getCompositeType();
   EXPECT_EQ(nullptr, N->getVTableHolder());
   N->replaceVTableHolder(VTableHolder);
   EXPECT_EQ(VTableHolder, N->getVTableHolder());
@@ -1134,28 +1216,27 @@ TEST_F(MDCompositeTypeTest, replaceOperands) {
   EXPECT_EQ(nullptr, N->getVTableHolder());
 
   auto *TemplateParams = MDTuple::getDistinct(Context, None);
-  EXPECT_EQ(nullptr, N->getTemplateParams());
+  EXPECT_EQ(nullptr, N->getTemplateParams().get());
   N->replaceTemplateParams(TemplateParams);
-  EXPECT_EQ(TemplateParams, N->getTemplateParams());
+  EXPECT_EQ(TemplateParams, N->getTemplateParams().get());
   N->replaceTemplateParams(nullptr);
-  EXPECT_EQ(nullptr, N->getTemplateParams());
+  EXPECT_EQ(nullptr, N->getTemplateParams().get());
 }
 
 typedef MetadataTest MDSubroutineTypeTest;
 
 TEST_F(MDSubroutineTypeTest, get) {
   unsigned Flags = 1;
-  Metadata *TypeArray = MDTuple::getDistinct(Context, None);
+  MDTuple *TypeArray = getTuple();
 
   auto *N = MDSubroutineType::get(Context, Flags, TypeArray);
   EXPECT_EQ(dwarf::DW_TAG_subroutine_type, N->getTag());
   EXPECT_EQ(Flags, N->getFlags());
-  EXPECT_EQ(TypeArray, N->getTypeArray());
+  EXPECT_EQ(TypeArray, N->getTypeArray().get());
   EXPECT_EQ(N, MDSubroutineType::get(Context, Flags, TypeArray));
 
   EXPECT_NE(N, MDSubroutineType::get(Context, Flags + 1, TypeArray));
-  EXPECT_NE(N, MDSubroutineType::get(Context, Flags,
-                                     MDTuple::getDistinct(Context, None)));
+  EXPECT_NE(N, MDSubroutineType::get(Context, Flags, getTuple()));
 
   TempMDSubroutineType Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
@@ -1166,7 +1247,7 @@ TEST_F(MDSubroutineTypeTest, get) {
   EXPECT_EQ("", N->getName());
   EXPECT_EQ(nullptr, N->getBaseType());
   EXPECT_EQ(nullptr, N->getVTableHolder());
-  EXPECT_EQ(nullptr, N->getTemplateParams());
+  EXPECT_EQ(nullptr, N->getTemplateParams().get());
   EXPECT_EQ("", N->getIdentifier());
 }
 
@@ -1199,18 +1280,18 @@ typedef MetadataTest MDCompileUnitTest;
 
 TEST_F(MDCompileUnitTest, get) {
   unsigned SourceLanguage = 1;
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   StringRef Producer = "some producer";
   bool IsOptimized = false;
   StringRef Flags = "flag after flag";
   unsigned RuntimeVersion = 2;
   StringRef SplitDebugFilename = "another/file";
   unsigned EmissionKind = 3;
-  Metadata *EnumTypes = MDTuple::getDistinct(Context, None);
-  Metadata *RetainedTypes = MDTuple::getDistinct(Context, None);
-  Metadata *Subprograms = MDTuple::getDistinct(Context, None);
-  Metadata *GlobalVariables = MDTuple::getDistinct(Context, None);
-  Metadata *ImportedEntities = MDTuple::getDistinct(Context, None);
+  MDTuple *EnumTypes = getTuple();
+  MDTuple *RetainedTypes = getTuple();
+  MDTuple *Subprograms = getTuple();
+  MDTuple *GlobalVariables = getTuple();
+  MDTuple *ImportedEntities = getTuple();
   auto *N = MDCompileUnit::get(
       Context, SourceLanguage, File, Producer, IsOptimized, Flags,
       RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
@@ -1225,11 +1306,11 @@ TEST_F(MDCompileUnitTest, get) {
   EXPECT_EQ(RuntimeVersion, N->getRuntimeVersion());
   EXPECT_EQ(SplitDebugFilename, N->getSplitDebugFilename());
   EXPECT_EQ(EmissionKind, N->getEmissionKind());
-  EXPECT_EQ(EnumTypes, N->getEnumTypes());
-  EXPECT_EQ(RetainedTypes, N->getRetainedTypes());
-  EXPECT_EQ(Subprograms, N->getSubprograms());
-  EXPECT_EQ(GlobalVariables, N->getGlobalVariables());
-  EXPECT_EQ(ImportedEntities, N->getImportedEntities());
+  EXPECT_EQ(EnumTypes, N->getEnumTypes().get());
+  EXPECT_EQ(RetainedTypes, N->getRetainedTypes().get());
+  EXPECT_EQ(Subprograms, N->getSubprograms().get());
+  EXPECT_EQ(GlobalVariables, N->getGlobalVariables().get());
+  EXPECT_EQ(ImportedEntities, N->getImportedEntities().get());
   EXPECT_EQ(N, MDCompileUnit::get(Context, SourceLanguage, File, Producer,
                                   IsOptimized, Flags, RuntimeVersion,
                                   SplitDebugFilename, EmissionKind, EnumTypes,
@@ -1241,7 +1322,7 @@ TEST_F(MDCompileUnitTest, get) {
                                   SplitDebugFilename, EmissionKind, EnumTypes,
                                   RetainedTypes, Subprograms, GlobalVariables,
                                   ImportedEntities));
-  EXPECT_NE(N, MDCompileUnit::get(Context, SourceLanguage, EnumTypes, Producer,
+  EXPECT_NE(N, MDCompileUnit::get(Context, SourceLanguage, getFile(), Producer,
                                   IsOptimized, Flags, RuntimeVersion,
                                   SplitDebugFilename, EmissionKind, EnumTypes,
                                   RetainedTypes, Subprograms, GlobalVariables,
@@ -1278,25 +1359,26 @@ TEST_F(MDCompileUnitTest, get) {
                                   GlobalVariables, ImportedEntities));
   EXPECT_NE(N, MDCompileUnit::get(Context, SourceLanguage, File, Producer,
                                   IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, File,
+                                  SplitDebugFilename, EmissionKind, getTuple(),
                                   RetainedTypes, Subprograms, GlobalVariables,
                                   ImportedEntities));
   EXPECT_NE(N, MDCompileUnit::get(
                    Context, SourceLanguage, File, Producer, IsOptimized, Flags,
                    RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
-                   File, Subprograms, GlobalVariables, ImportedEntities));
-  EXPECT_NE(N, MDCompileUnit::get(
-                   Context, SourceLanguage, File, Producer, IsOptimized, Flags,
-                   RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
-                   RetainedTypes, File, GlobalVariables, ImportedEntities));
+                   getTuple(), Subprograms, GlobalVariables, ImportedEntities));
+  EXPECT_NE(N, MDCompileUnit::get(Context, SourceLanguage, File, Producer,
+                                  IsOptimized, Flags, RuntimeVersion,
+                                  SplitDebugFilename, EmissionKind, EnumTypes,
+                                  RetainedTypes, getTuple(), GlobalVariables,
+                                  ImportedEntities));
   EXPECT_NE(N, MDCompileUnit::get(
                    Context, SourceLanguage, File, Producer, IsOptimized, Flags,
                    RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
-                   RetainedTypes, Subprograms, File, ImportedEntities));
+                   RetainedTypes, Subprograms, getTuple(), ImportedEntities));
   EXPECT_NE(N, MDCompileUnit::get(
                    Context, SourceLanguage, File, Producer, IsOptimized, Flags,
                    RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
-                   RetainedTypes, Subprograms, GlobalVariables, File));
+                   RetainedTypes, Subprograms, GlobalVariables, getTuple()));
 
   TempMDCompileUnit Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
@@ -1304,57 +1386,57 @@ TEST_F(MDCompileUnitTest, get) {
 
 TEST_F(MDCompileUnitTest, replaceArrays) {
   unsigned SourceLanguage = 1;
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   StringRef Producer = "some producer";
   bool IsOptimized = false;
   StringRef Flags = "flag after flag";
   unsigned RuntimeVersion = 2;
   StringRef SplitDebugFilename = "another/file";
   unsigned EmissionKind = 3;
-  Metadata *EnumTypes = MDTuple::getDistinct(Context, None);
-  Metadata *RetainedTypes = MDTuple::getDistinct(Context, None);
-  Metadata *ImportedEntities = MDTuple::getDistinct(Context, None);
+  MDTuple *EnumTypes = MDTuple::getDistinct(Context, None);
+  MDTuple *RetainedTypes = MDTuple::getDistinct(Context, None);
+  MDTuple *ImportedEntities = MDTuple::getDistinct(Context, None);
   auto *N = MDCompileUnit::get(
       Context, SourceLanguage, File, Producer, IsOptimized, Flags,
       RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
       RetainedTypes, nullptr, nullptr, ImportedEntities);
 
   auto *Subprograms = MDTuple::getDistinct(Context, None);
-  EXPECT_EQ(nullptr, N->getSubprograms());
+  EXPECT_EQ(nullptr, N->getSubprograms().get());
   N->replaceSubprograms(Subprograms);
-  EXPECT_EQ(Subprograms, N->getSubprograms());
+  EXPECT_EQ(Subprograms, N->getSubprograms().get());
   N->replaceSubprograms(nullptr);
-  EXPECT_EQ(nullptr, N->getSubprograms());
+  EXPECT_EQ(nullptr, N->getSubprograms().get());
 
   auto *GlobalVariables = MDTuple::getDistinct(Context, None);
-  EXPECT_EQ(nullptr, N->getGlobalVariables());
+  EXPECT_EQ(nullptr, N->getGlobalVariables().get());
   N->replaceGlobalVariables(GlobalVariables);
-  EXPECT_EQ(GlobalVariables, N->getGlobalVariables());
+  EXPECT_EQ(GlobalVariables, N->getGlobalVariables().get());
   N->replaceGlobalVariables(nullptr);
-  EXPECT_EQ(nullptr, N->getGlobalVariables());
+  EXPECT_EQ(nullptr, N->getGlobalVariables().get());
 }
 
 typedef MetadataTest MDSubprogramTest;
 
 TEST_F(MDSubprogramTest, get) {
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
+  MDScopeRef Scope = getCompositeType();
   StringRef Name = "name";
   StringRef LinkageName = "linkage";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 2;
-  Metadata *Type = MDTuple::getDistinct(Context, None);
+  MDSubroutineType *Type = getSubroutineType();
   bool IsLocalToUnit = false;
   bool IsDefinition = true;
   unsigned ScopeLine = 3;
-  Metadata *ContainingType = MDTuple::getDistinct(Context, None);
+  MDTypeRef ContainingType = getCompositeType();
   unsigned Virtuality = 4;
   unsigned VirtualIndex = 5;
   unsigned Flags = 6;
   bool IsOptimized = false;
-  Metadata *Function = MDTuple::getDistinct(Context, None);
-  Metadata *TemplateParams = MDTuple::getDistinct(Context, None);
-  Metadata *Declaration = MDTuple::getDistinct(Context, None);
-  Metadata *Variables = MDTuple::getDistinct(Context, None);
+  llvm::Function *Function = getFunction("foo");
+  MDTuple *TemplateParams = getTuple();
+  MDSubprogram *Declaration = getSubprogram();
+  MDTuple *Variables = getTuple();
 
   auto *N = MDSubprogram::get(
       Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
@@ -1377,20 +1459,20 @@ TEST_F(MDSubprogramTest, get) {
   EXPECT_EQ(Flags, N->getFlags());
   EXPECT_EQ(IsOptimized, N->isOptimized());
   EXPECT_EQ(Function, N->getFunction());
-  EXPECT_EQ(TemplateParams, N->getTemplateParams());
+  EXPECT_EQ(TemplateParams, N->getTemplateParams().get());
   EXPECT_EQ(Declaration, N->getDeclaration());
-  EXPECT_EQ(Variables, N->getVariables());
+  EXPECT_EQ(Variables, N->getVariables().get());
   EXPECT_EQ(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
                                  Flags, IsOptimized, Function, TemplateParams,
                                  Declaration, Variables));
 
-  EXPECT_NE(N, MDSubprogram::get(Context, File, Name, LinkageName, File, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
-                                 Declaration, Variables));
+  EXPECT_NE(N, MDSubprogram::get(Context, getCompositeType(), Name, LinkageName,
+                                 File, Line, Type, IsLocalToUnit, IsDefinition,
+                                 ScopeLine, ContainingType, Virtuality,
+                                 VirtualIndex, Flags, IsOptimized, Function,
+                                 TemplateParams, Declaration, Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, "other", LinkageName, File,
                                  Line, Type, IsLocalToUnit, IsDefinition,
                                  ScopeLine, ContainingType, Virtuality,
@@ -1401,21 +1483,21 @@ TEST_F(MDSubprogramTest, get) {
                                  ContainingType, Virtuality, VirtualIndex,
                                  Flags, IsOptimized, Function, TemplateParams,
                                  Declaration, Variables));
-  EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, Scope, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
-                                 Declaration, Variables));
+  EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, getFile(),
+                                 Line, Type, IsLocalToUnit, IsDefinition,
+                                 ScopeLine, ContainingType, Virtuality,
+                                 VirtualIndex, Flags, IsOptimized, Function,
+                                 TemplateParams, Declaration, Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File,
                                  Line + 1, Type, IsLocalToUnit, IsDefinition,
                                  ScopeLine, ContainingType, Virtuality,
                                  VirtualIndex, Flags, IsOptimized, Function,
                                  TemplateParams, Declaration, Variables));
-  EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Scope, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
-                                 Declaration, Variables));
+  EXPECT_NE(N, MDSubprogram::get(
+                   Context, Scope, Name, LinkageName, File, Line,
+                   getSubroutineType(), IsLocalToUnit, IsDefinition, ScopeLine,
+                   ContainingType, Virtuality, VirtualIndex, Flags, IsOptimized,
+                   Function, TemplateParams, Declaration, Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, !IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
@@ -1433,8 +1515,8 @@ TEST_F(MDSubprogramTest, get) {
                                  TemplateParams, Declaration, Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 Type, Virtuality, VirtualIndex, Flags,
-                                 IsOptimized, Function, TemplateParams,
+                                 getCompositeType(), Virtuality, VirtualIndex,
+                                 Flags, IsOptimized, Function, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
@@ -1459,46 +1541,46 @@ TEST_F(MDSubprogramTest, get) {
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Type, TemplateParams,
-                                 Declaration, Variables));
+                                 Flags, IsOptimized, getFunction("bar"),
+                                 TemplateParams, Declaration, Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, Type,
+                                 Flags, IsOptimized, Function, getTuple(),
                                  Declaration, Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
                                  Flags, IsOptimized, Function, TemplateParams,
-                                 Type, Variables));
+                                 getSubprogram(), Variables));
   EXPECT_NE(N, MDSubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
                                  Flags, IsOptimized, Function, TemplateParams,
-                                 Declaration, Type));
+                                 Declaration, getTuple()));
 
   TempMDSubprogram Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
 }
 
 TEST_F(MDSubprogramTest, replaceFunction) {
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
+  MDScopeRef Scope = getCompositeType();
   StringRef Name = "name";
   StringRef LinkageName = "linkage";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 2;
-  Metadata *Type = MDTuple::getDistinct(Context, None);
+  MDSubroutineType *Type = getSubroutineType();
   bool IsLocalToUnit = false;
   bool IsDefinition = true;
   unsigned ScopeLine = 3;
-  Metadata *ContainingType = MDTuple::getDistinct(Context, None);
+  MDTypeRef ContainingType = getCompositeType();
   unsigned Virtuality = 4;
   unsigned VirtualIndex = 5;
   unsigned Flags = 6;
   bool IsOptimized = false;
-  Metadata *TemplateParams = MDTuple::getDistinct(Context, None);
-  Metadata *Declaration = MDTuple::getDistinct(Context, None);
-  Metadata *Variables = MDTuple::getDistinct(Context, None);
+  MDTuple *TemplateParams = getTuple();
+  MDSubprogram *Declaration = getSubprogram();
+  MDTuple *Variables = getTuple();
 
   auto *N = MDSubprogram::get(
       Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
@@ -1511,7 +1593,7 @@ TEST_F(MDSubprogramTest, replaceFunction) {
       Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
                        GlobalValue::ExternalLinkage));
   N->replaceFunction(F.get());
-  EXPECT_EQ(ConstantAsMetadata::get(F.get()), N->getFunction());
+  EXPECT_EQ(F.get(), N->getFunction());
 
   N->replaceFunction(nullptr);
   EXPECT_EQ(nullptr, N->getFunction());
@@ -1520,8 +1602,8 @@ TEST_F(MDSubprogramTest, replaceFunction) {
 typedef MetadataTest MDLexicalBlockTest;
 
 TEST_F(MDLexicalBlockTest, get) {
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDLocalScope *Scope = getSubprogram();
+  MDFile *File = getFile();
   unsigned Line = 5;
   unsigned Column = 8;
 
@@ -1534,8 +1616,9 @@ TEST_F(MDLexicalBlockTest, get) {
   EXPECT_EQ(Column, N->getColumn());
   EXPECT_EQ(N, MDLexicalBlock::get(Context, Scope, File, Line, Column));
 
-  EXPECT_NE(N, MDLexicalBlock::get(Context, File, File, Line, Column));
-  EXPECT_NE(N, MDLexicalBlock::get(Context, Scope, Scope, Line, Column));
+  EXPECT_NE(N,
+            MDLexicalBlock::get(Context, getSubprogram(), File, Line, Column));
+  EXPECT_NE(N, MDLexicalBlock::get(Context, Scope, getFile(), Line, Column));
   EXPECT_NE(N, MDLexicalBlock::get(Context, Scope, File, Line + 1, Column));
   EXPECT_NE(N, MDLexicalBlock::get(Context, Scope, File, Line, Column + 1));
 
@@ -1546,8 +1629,8 @@ TEST_F(MDLexicalBlockTest, get) {
 typedef MetadataTest MDLexicalBlockFileTest;
 
 TEST_F(MDLexicalBlockFileTest, get) {
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDLocalScope *Scope = getSubprogram();
+  MDFile *File = getFile();
   unsigned Discriminator = 5;
 
   auto *N = MDLexicalBlockFile::get(Context, Scope, File, Discriminator);
@@ -1558,8 +1641,10 @@ TEST_F(MDLexicalBlockFileTest, get) {
   EXPECT_EQ(Discriminator, N->getDiscriminator());
   EXPECT_EQ(N, MDLexicalBlockFile::get(Context, Scope, File, Discriminator));
 
-  EXPECT_NE(N, MDLexicalBlockFile::get(Context, File, File, Discriminator));
-  EXPECT_NE(N, MDLexicalBlockFile::get(Context, Scope, Scope, Discriminator));
+  EXPECT_NE(N, MDLexicalBlockFile::get(Context, getSubprogram(), File,
+                                       Discriminator));
+  EXPECT_NE(N,
+            MDLexicalBlockFile::get(Context, Scope, getFile(), Discriminator));
   EXPECT_NE(N,
             MDLexicalBlockFile::get(Context, Scope, File, Discriminator + 1));
 
@@ -1570,8 +1655,8 @@ TEST_F(MDLexicalBlockFileTest, get) {
 typedef MetadataTest MDNamespaceTest;
 
 TEST_F(MDNamespaceTest, get) {
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDScope *Scope = getFile();
+  MDFile *File = getFile();
   StringRef Name = "namespace";
   unsigned Line = 5;
 
@@ -1584,8 +1669,8 @@ TEST_F(MDNamespaceTest, get) {
   EXPECT_EQ(Line, N->getLine());
   EXPECT_EQ(N, MDNamespace::get(Context, Scope, File, Name, Line));
 
-  EXPECT_NE(N, MDNamespace::get(Context, File, File, Name, Line));
-  EXPECT_NE(N, MDNamespace::get(Context, Scope, Scope, Name, Line));
+  EXPECT_NE(N, MDNamespace::get(Context, getFile(), File, Name, Line));
+  EXPECT_NE(N, MDNamespace::get(Context, Scope, getFile(), Name, Line));
   EXPECT_NE(N, MDNamespace::get(Context, Scope, File, "other", Line));
   EXPECT_NE(N, MDNamespace::get(Context, Scope, File, Name, Line + 1));
 
@@ -1597,8 +1682,7 @@ typedef MetadataTest MDTemplateTypeParameterTest;
 
 TEST_F(MDTemplateTypeParameterTest, get) {
   StringRef Name = "template";
-  Metadata *Type = MDTuple::getDistinct(Context, None);
-  Metadata *Other = MDTuple::getDistinct(Context, None);
+  MDTypeRef Type = getBasicType("basic");
 
   auto *N = MDTemplateTypeParameter::get(Context, Name, Type);
 
@@ -1608,7 +1692,8 @@ TEST_F(MDTemplateTypeParameterTest, get) {
   EXPECT_EQ(N, MDTemplateTypeParameter::get(Context, Name, Type));
 
   EXPECT_NE(N, MDTemplateTypeParameter::get(Context, "other", Type));
-  EXPECT_NE(N, MDTemplateTypeParameter::get(Context, Name, Other));
+  EXPECT_NE(N,
+            MDTemplateTypeParameter::get(Context, Name, getBasicType("other")));
 
   TempMDTemplateTypeParameter Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
@@ -1619,9 +1704,8 @@ typedef MetadataTest MDTemplateValueParameterTest;
 TEST_F(MDTemplateValueParameterTest, get) {
   unsigned Tag = dwarf::DW_TAG_template_value_parameter;
   StringRef Name = "template";
-  Metadata *Type = MDTuple::getDistinct(Context, None);
-  Metadata *Value = MDTuple::getDistinct(Context, None);
-  Metadata *Other = MDTuple::getDistinct(Context, None);
+  MDTypeRef Type = getBasicType("basic");
+  Metadata *Value = getConstantAsMetadata();
 
   auto *N = MDTemplateValueParameter::get(Context, Tag, Name, Type, Value);
   EXPECT_EQ(Tag, N->getTag());
@@ -1635,9 +1719,10 @@ TEST_F(MDTemplateValueParameterTest, get) {
                    Type, Value));
   EXPECT_NE(N, MDTemplateValueParameter::get(Context, Tag,  "other", Type,
                                              Value));
-  EXPECT_NE(N, MDTemplateValueParameter::get(Context, Tag,  Name, Other,
-                                             Value));
-  EXPECT_NE(N, MDTemplateValueParameter::get(Context, Tag, Name, Type, Other));
+  EXPECT_NE(N, MDTemplateValueParameter::get(Context, Tag, Name,
+                                             getBasicType("other"), Value));
+  EXPECT_NE(N, MDTemplateValueParameter::get(Context, Tag, Name, Type,
+                                             getConstantAsMetadata()));
 
   TempMDTemplateValueParameter Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
@@ -1646,16 +1731,17 @@ TEST_F(MDTemplateValueParameterTest, get) {
 typedef MetadataTest MDGlobalVariableTest;
 
 TEST_F(MDGlobalVariableTest, get) {
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
+  MDScope *Scope = getSubprogram();
   StringRef Name = "name";
   StringRef LinkageName = "linkage";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 5;
-  Metadata *Type = MDTuple::getDistinct(Context, None);
+  MDTypeRef Type = getDerivedType();
   bool IsLocalToUnit = false;
   bool IsDefinition = true;
-  Metadata *Variable = MDTuple::getDistinct(Context, None);
-  Metadata *StaticDataMemberDeclaration = MDTuple::getDistinct(Context, None);
+  Constant *Variable = getConstant();
+  MDDerivedType *StaticDataMemberDeclaration =
+      cast<MDDerivedType>(getDerivedType());
 
   auto *N = MDGlobalVariable::get(Context, Scope, Name, LinkageName, File, Line,
                                   Type, IsLocalToUnit, IsDefinition, Variable,
@@ -1675,37 +1761,42 @@ TEST_F(MDGlobalVariableTest, get) {
                                      Line, Type, IsLocalToUnit, IsDefinition,
                                      Variable, StaticDataMemberDeclaration));
 
-  EXPECT_NE(N, MDGlobalVariable::get(Context, File, Name, LinkageName, File,
-                                     Line, Type, IsLocalToUnit, IsDefinition,
-                                     Variable, StaticDataMemberDeclaration));
+  EXPECT_NE(N,
+            MDGlobalVariable::get(Context, getSubprogram(), Name, LinkageName,
+                                  File, Line, Type, IsLocalToUnit, IsDefinition,
+                                  Variable, StaticDataMemberDeclaration));
   EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, "other", LinkageName, File,
                                      Line, Type, IsLocalToUnit, IsDefinition,
                                      Variable, StaticDataMemberDeclaration));
   EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, Name, "other", File, Line,
                                      Type, IsLocalToUnit, IsDefinition,
                                      Variable, StaticDataMemberDeclaration));
-  EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, Name, LinkageName, Scope,
-                                     Line, Type, IsLocalToUnit, IsDefinition,
-                                     Variable, StaticDataMemberDeclaration));
+  EXPECT_NE(N,
+            MDGlobalVariable::get(Context, Scope, Name, LinkageName, getFile(),
+                                  Line, Type, IsLocalToUnit, IsDefinition,
+                                  Variable, StaticDataMemberDeclaration));
   EXPECT_NE(N,
             MDGlobalVariable::get(Context, Scope, Name, LinkageName, File,
                                   Line + 1, Type, IsLocalToUnit, IsDefinition,
                                   Variable, StaticDataMemberDeclaration));
-  EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, Name, LinkageName, File,
-                                     Line, Scope, IsLocalToUnit, IsDefinition,
-                                     Variable, StaticDataMemberDeclaration));
+  EXPECT_NE(N,
+            MDGlobalVariable::get(Context, Scope, Name, LinkageName, File, Line,
+                                  getDerivedType(), IsLocalToUnit, IsDefinition,
+                                  Variable, StaticDataMemberDeclaration));
   EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, Name, LinkageName, File,
                                      Line, Type, !IsLocalToUnit, IsDefinition,
                                      Variable, StaticDataMemberDeclaration));
   EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, Name, LinkageName, File,
                                      Line, Type, IsLocalToUnit, !IsDefinition,
                                      Variable, StaticDataMemberDeclaration));
-  EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, Name, LinkageName, File,
-                                     Line, Type, IsLocalToUnit, IsDefinition,
-                                     Type, StaticDataMemberDeclaration));
-  EXPECT_NE(N, MDGlobalVariable::get(Context, Scope, Name, LinkageName, File,
-                                     Line, Type, IsLocalToUnit, IsDefinition,
-                                     Variable, Type));
+  EXPECT_NE(N,
+            MDGlobalVariable::get(Context, Scope, Name, LinkageName, File, Line,
+                                  Type, IsLocalToUnit, IsDefinition,
+                                  getConstant(), StaticDataMemberDeclaration));
+  EXPECT_NE(N,
+            MDGlobalVariable::get(Context, Scope, Name, LinkageName, File, Line,
+                                  Type, IsLocalToUnit, IsDefinition, Variable,
+                                  cast<MDDerivedType>(getDerivedType())));
 
   TempMDGlobalVariable Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
@@ -1715,19 +1806,16 @@ typedef MetadataTest MDLocalVariableTest;
 
 TEST_F(MDLocalVariableTest, get) {
   unsigned Tag = dwarf::DW_TAG_arg_variable;
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
+  MDLocalScope *Scope = getSubprogram();
   StringRef Name = "name";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 5;
-  Metadata *Type = MDTuple::getDistinct(Context, None);
+  MDTypeRef Type = getDerivedType();
   unsigned Arg = 6;
   unsigned Flags = 7;
-  Metadata *InlinedAtScope = MDTuple::getDistinct(Context, None);
-  Metadata *InlinedAt =
-      MDLocation::getDistinct(Context, 10, 20, InlinedAtScope);
 
   auto *N = MDLocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                 Arg, Flags, InlinedAt);
+                                 Arg, Flags);
   EXPECT_EQ(Tag, N->getTag());
   EXPECT_EQ(Scope, N->getScope());
   EXPECT_EQ(Name, N->getName());
@@ -1736,45 +1824,28 @@ TEST_F(MDLocalVariableTest, get) {
   EXPECT_EQ(Type, N->getType());
   EXPECT_EQ(Arg, N->getArg());
   EXPECT_EQ(Flags, N->getFlags());
-  EXPECT_EQ(InlinedAt, N->getInlinedAt());
   EXPECT_EQ(N, MDLocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                    Arg, Flags, InlinedAt));
+                                    Arg, Flags));
 
   EXPECT_NE(N, MDLocalVariable::get(Context, dwarf::DW_TAG_auto_variable, Scope,
-                                    Name, File, Line, Type, Arg, Flags,
-                                    InlinedAt));
-  EXPECT_NE(N, MDLocalVariable::get(Context, Tag, File, Name, File, Line,
-                                    Type, Arg, Flags, InlinedAt));
+                                    Name, File, Line, Type, Arg, Flags));
+  EXPECT_NE(N, MDLocalVariable::get(Context, Tag, getSubprogram(), Name, File,
+                                    Line, Type, Arg, Flags));
   EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, "other", File, Line,
-                                    Type, Arg, Flags, InlinedAt));
-  EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, Name, Scope, Line,
-                                    Type, Arg, Flags, InlinedAt));
+                                    Type, Arg, Flags));
+  EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, Name, getFile(), Line,
+                                    Type, Arg, Flags));
   EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, Name, File, Line + 1,
-                                    Type, Arg, Flags, InlinedAt));
+                                    Type, Arg, Flags));
   EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, Name, File, Line,
-                                    Scope, Arg, Flags, InlinedAt));
-  EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                    Arg + 1, Flags, InlinedAt));
+                                    getDerivedType(), Arg, Flags));
   EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                    Arg, ~Flags, InlinedAt));
+                                    Arg + 1, Flags));
   EXPECT_NE(N, MDLocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                    Arg, Flags, Scope));
+                                    Arg, ~Flags));
 
   TempMDLocalVariable Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
-
-  auto *Inlined = N->withoutInline();
-  EXPECT_NE(N, Inlined);
-  EXPECT_EQ(N->getTag(), Inlined->getTag());
-  EXPECT_EQ(N->getScope(), Inlined->getScope());
-  EXPECT_EQ(N->getName(), Inlined->getName());
-  EXPECT_EQ(N->getFile(), Inlined->getFile());
-  EXPECT_EQ(N->getLine(), Inlined->getLine());
-  EXPECT_EQ(N->getType(), Inlined->getType());
-  EXPECT_EQ(N->getArg(), Inlined->getArg());
-  EXPECT_EQ(N->getFlags(), Inlined->getFlags());
-  EXPECT_EQ(nullptr, Inlined->getInlinedAt());
-  EXPECT_EQ(N, Inlined->withInline(cast<MDLocation>(InlinedAt)));
 }
 
 typedef MetadataTest MDExpressionTest;
@@ -1836,12 +1907,12 @@ typedef MetadataTest MDObjCPropertyTest;
 
 TEST_F(MDObjCPropertyTest, get) {
   StringRef Name = "name";
-  Metadata *File = MDTuple::getDistinct(Context, None);
+  MDFile *File = getFile();
   unsigned Line = 5;
   StringRef GetterName = "getter";
   StringRef SetterName = "setter";
   unsigned Attributes = 7;
-  Metadata *Type = MDTuple::getDistinct(Context, None);
+  MDType *Type = cast<MDBasicType>(getBasicType("basic"));
 
   auto *N = MDObjCProperty::get(Context, Name, File, Line, GetterName,
                                 SetterName, Attributes, Type);
@@ -1859,7 +1930,7 @@ TEST_F(MDObjCPropertyTest, get) {
 
   EXPECT_NE(N, MDObjCProperty::get(Context, "other", File, Line, GetterName,
                                    SetterName, Attributes, Type));
-  EXPECT_NE(N, MDObjCProperty::get(Context, Name, Type, Line, GetterName,
+  EXPECT_NE(N, MDObjCProperty::get(Context, Name, getFile(), Line, GetterName,
                                    SetterName, Attributes, Type));
   EXPECT_NE(N, MDObjCProperty::get(Context, Name, File, Line + 1, GetterName,
                                    SetterName, Attributes, Type));
@@ -1870,7 +1941,8 @@ TEST_F(MDObjCPropertyTest, get) {
   EXPECT_NE(N, MDObjCProperty::get(Context, Name, File, Line, GetterName,
                                    SetterName, Attributes + 1, Type));
   EXPECT_NE(N, MDObjCProperty::get(Context, Name, File, Line, GetterName,
-                                   SetterName, Attributes, File));
+                                   SetterName, Attributes,
+                                   cast<MDBasicType>(getBasicType("other"))));
 
   TempMDObjCProperty Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
@@ -1880,8 +1952,8 @@ typedef MetadataTest MDImportedEntityTest;
 
 TEST_F(MDImportedEntityTest, get) {
   unsigned Tag = dwarf::DW_TAG_imported_module;
-  Metadata *Scope = MDTuple::getDistinct(Context, None);
-  Metadata *Entity = MDTuple::getDistinct(Context, None);
+  MDScope *Scope = getSubprogram();
+  DebugNodeRef Entity = getCompositeType();
   unsigned Line = 5;
   StringRef Name = "name";
 
@@ -1897,8 +1969,10 @@ TEST_F(MDImportedEntityTest, get) {
   EXPECT_NE(N,
             MDImportedEntity::get(Context, dwarf::DW_TAG_imported_declaration,
                                   Scope, Entity, Line, Name));
-  EXPECT_NE(N, MDImportedEntity::get(Context, Tag, Entity, Entity, Line, Name));
-  EXPECT_NE(N, MDImportedEntity::get(Context, Tag, Scope, Scope, Line, Name));
+  EXPECT_NE(N, MDImportedEntity::get(Context, Tag, getSubprogram(), Entity,
+                                     Line, Name));
+  EXPECT_NE(N, MDImportedEntity::get(Context, Tag, Scope, getCompositeType(),
+                                     Line, Name));
   EXPECT_NE(N,
             MDImportedEntity::get(Context, Tag, Scope, Entity, Line + 1, Name));
   EXPECT_NE(N,
diff --git a/unittests/IR/ValueHandleTest.cpp b/unittests/IR/ValueHandleTest.cpp
index 403d2bc..6000c4a 100644
--- a/unittests/IR/ValueHandleTest.cpp
+++ b/unittests/IR/ValueHandleTest.cpp
@@ -244,8 +244,11 @@ TEST_F(ValueHandle, CallbackVH_CallbackOnDeletion) {
     RecordingVH(Value *V) : CallbackVH(V), DeletedCalls(0), AURWCalls(0) {}
 
   private:
-    virtual void deleted() { DeletedCalls++; CallbackVH::deleted(); }
-    virtual void allUsesReplacedWith(Value *) { AURWCalls++; }
+    void deleted() override {
+      DeletedCalls++;
+      CallbackVH::deleted();
+    }
+    void allUsesReplacedWith(Value *) override { AURWCalls++; }
   };
 
   RecordingVH RVH;
@@ -268,8 +271,11 @@ TEST_F(ValueHandle, CallbackVH_CallbackOnRAUW) {
       : CallbackVH(V), DeletedCalls(0), AURWArgument(nullptr) {}
 
   private:
-    virtual void deleted() { DeletedCalls++; CallbackVH::deleted(); }
-    virtual void allUsesReplacedWith(Value *new_value) {
+    void deleted() override {
+      DeletedCalls++;
+      CallbackVH::deleted();
+    }
+    void allUsesReplacedWith(Value *new_value) override {
       EXPECT_EQ(nullptr, AURWArgument);
       AURWArgument = new_value;
     }
@@ -298,11 +304,11 @@ TEST_F(ValueHandle, CallbackVH_DeletionCanRAUW) {
         Context(&getGlobalContext()) {}
 
   private:
-    virtual void deleted() {
+    void deleted() override {
       getValPtr()->replaceAllUsesWith(Constant::getNullValue(Type::getInt32Ty(getGlobalContext())));
       setValPtr(nullptr);
     }
-    virtual void allUsesReplacedWith(Value *new_value) {
+    void allUsesReplacedWith(Value *new_value) override {
       ASSERT_TRUE(nullptr != getValPtr());
       EXPECT_EQ(1U, getValPtr()->getNumUses());
       EXPECT_EQ(nullptr, AURWArgument);
@@ -341,12 +347,12 @@ TEST_F(ValueHandle, DestroyingOtherVHOnSameValueDoesntBreakIteration) {
       setValPtr(V);
       ToClear[1].reset(new WeakVH(V));
     }
-    virtual void deleted() {
+    void deleted() override {
       ToClear[0].reset();
       ToClear[1].reset();
       CallbackVH::deleted();
     }
-    virtual void allUsesReplacedWith(Value *) {
+    void allUsesReplacedWith(Value *) override {
       ToClear[0].reset();
       ToClear[1].reset();
     }
@@ -388,7 +394,7 @@ TEST_F(ValueHandle, AssertingVHCheckedLast) {
       ToClear[1] = &A1;
     }
 
-    virtual void deleted() {
+    void deleted() override {
       *ToClear[0] = nullptr;
       *ToClear[1] = nullptr;
       CallbackVH::deleted();
diff --git a/unittests/IR/ValueMapTest.cpp b/unittests/IR/ValueMapTest.cpp
index a6bad71..1431a8d 100644
--- a/unittests/IR/ValueMapTest.cpp
+++ b/unittests/IR/ValueMapTest.cpp
@@ -194,7 +194,8 @@ struct LockMutex : ValueMapConfig<KeyT, MutexT> {
   }
   static MutexT *getMutex(const ExtraData &Data) { return Data.M; }
 };
-#if LLVM_ENABLE_THREADS
+// FIXME: These tests started failing on Windows.
+#if LLVM_ENABLE_THREADS && !defined(LLVM_ON_WIN32)
 TYPED_TEST(ValueMapTest, LocksMutex) {
   sys::Mutex M(false);  // Not recursive.
   bool CalledRAUW = false, CalledDeleted = false;
diff --git a/unittests/LineEditor/LineEditor.cpp b/unittests/LineEditor/LineEditor.cpp
index 26053c0..4d9081f 100644
--- a/unittests/LineEditor/LineEditor.cpp
+++ b/unittests/LineEditor/LineEditor.cpp
@@ -29,7 +29,7 @@ public:
     LE = new LineEditor("test", HistPath);
   }
 
-  ~LineEditorTest() {
+  ~LineEditorTest() override {
     delete LE;
     sys::fs::remove(HistPath.str());
   }
diff --git a/unittests/Linker/LinkModulesTest.cpp b/unittests/Linker/LinkModulesTest.cpp
index fbd0363..b4689cb 100644
--- a/unittests/Linker/LinkModulesTest.cpp
+++ b/unittests/Linker/LinkModulesTest.cpp
@@ -23,7 +23,7 @@ namespace {
 
 class LinkModuleTest : public testing::Test {
 protected:
-  virtual void SetUp() {
+  void SetUp() override {
     M.reset(new Module("MyModule", Ctx));
     FunctionType *FTy = FunctionType::get(
         Type::getInt8PtrTy(Ctx), Type::getInt32Ty(Ctx), false /*=isVarArg*/);
@@ -56,7 +56,7 @@ protected:
     GV->setInitializer(ConstantArray::get(AT, Init));
   }
 
-  virtual void TearDown() { M.reset(); }
+  void TearDown() override { M.reset(); }
 
   LLVMContext Ctx;
   std::unique_ptr<Module> M;
diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp
index d209086..e0859fc 100644
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@@ -39,24 +39,10 @@ namespace {
 #endif
 
 // Define some fixed alignment types to use in these tests.
-#if __has_feature(cxx_alignas)
-struct alignas(1) A1 { };
-struct alignas(2) A2 { };
-struct alignas(4) A4 { };
-struct alignas(8) A8 { };
-#elif defined(__GNUC__)
-struct A1 { } __attribute__((aligned(1)));
-struct A2 { } __attribute__((aligned(2)));
-struct A4 { } __attribute__((aligned(4)));
-struct A8 { } __attribute__((aligned(8)));
-#elif defined(_MSC_VER)
-__declspec(align(1)) struct A1 { };
-__declspec(align(2)) struct A2 { };
-__declspec(align(4)) struct A4 { };
-__declspec(align(8)) struct A8 { };
-#else
-# error No supported align as directive.
-#endif
+struct LLVM_ALIGNAS(1) A1 {};
+struct LLVM_ALIGNAS(2) A2 {};
+struct LLVM_ALIGNAS(4) A4 {};
+struct LLVM_ALIGNAS(8) A8 {};
 
 struct S1 {};
 struct S2 { char a; };
@@ -75,12 +61,22 @@ struct D8 : S1, D4, D5 { double x[2]; };
 struct D9 : S1, D1 { S1 s1; };
 struct V1 { virtual ~V1(); };
 struct V2 { int x; virtual ~V2(); };
-struct V3 : V1 { virtual ~V3(); };
-struct V4 : virtual V2 { int y; virtual ~V4(); };
-struct V5 : V4, V3 { double z; virtual ~V5(); };
+struct V3 : V1 {
+  ~V3() override;
+};
+struct V4 : virtual V2 { int y;
+  ~V4() override;
+};
+struct V5 : V4, V3 { double z;
+  ~V5() override;
+};
 struct V6 : S1 { virtual ~V6(); };
-struct V7 : virtual V2, virtual V6 { virtual ~V7(); };
-struct V8 : V5, virtual V6, V7 { double zz; virtual ~V8(); };
+struct V7 : virtual V2, virtual V6 {
+  ~V7() override;
+};
+struct V8 : V5, virtual V6, V7 { double zz;
+  ~V8() override;
+};
 
 double S6::f() { return 0.0; }
 float D2::g() { return 0.0f; }
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index f3b55c3..3c8a090 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_unittest(SupportTests
   YAMLParserTest.cpp
   formatted_raw_ostream_test.cpp
   raw_ostream_test.cpp
+  raw_pwrite_stream_test.cpp
   )
 
 # ManagedStatic.cpp uses <pthread>.
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
index 9d7679d..328c4b7 100644
--- a/unittests/Support/CommandLineTest.cpp
+++ b/unittests/Support/CommandLineTest.cpp
@@ -65,9 +65,7 @@ public:
   StackOption(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3)
     : Base(M0, M1, M2, M3) {}
 
-  ~StackOption() {
-    this->removeArgument();
-  }
+  ~StackOption() override { this->removeArgument(); }
 };
 
 
diff --git a/unittests/Support/MemoryBufferTest.cpp b/unittests/Support/MemoryBufferTest.cpp
index 1cdd6ad..ffb809a 100644
--- a/unittests/Support/MemoryBufferTest.cpp
+++ b/unittests/Support/MemoryBufferTest.cpp
@@ -26,7 +26,7 @@ protected:
   : data("this is some data")
   { }
 
-  virtual void SetUp() { }
+  void SetUp() override {}
 
   /// Common testing for different modes of getOpenFileSlice.
   /// Creates a temporary file with known contents, and uses
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 479812c..262d272 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -308,7 +308,7 @@ protected:
   /// be placed. It is removed at the end of each test (must be empty).
   SmallString<128> TestDirectory;
 
-  virtual void SetUp() {
+  void SetUp() override {
     ASSERT_NO_ERROR(
         fs::createUniqueDirectory("file-system-test", TestDirectory));
     // We don't care about this specific file.
@@ -316,9 +316,7 @@ protected:
     errs().flush();
   }
 
-  virtual void TearDown() {
-    ASSERT_NO_ERROR(fs::remove(TestDirectory.str()));
-  }
+  void TearDown() override { ASSERT_NO_ERROR(fs::remove(TestDirectory.str())); }
 };
 
 TEST_F(FileSystemTest, Unique) {
diff --git a/unittests/Support/raw_pwrite_stream_test.cpp b/unittests/Support/raw_pwrite_stream_test.cpp
new file mode 100644
index 0000000..bcbb29b
--- /dev/null
+++ b/unittests/Support/raw_pwrite_stream_test.cpp
@@ -0,0 +1,25 @@
+//===- raw_pwrite_stream_test.cpp - raw_pwrite_stream tests ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(raw_pwrite_ostreamTest, TestSVector) {
+  SmallString<64> Buffer;
+  raw_svector_ostream OS(Buffer);
+  StringRef Test = "test";
+  OS.pwrite(Test.data(), Test.size(), 0);
+  EXPECT_EQ(Test, OS.str());
+}
+}
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index 8374099..636cb3c 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -30,9 +31,7 @@ namespace {
 
 class CloneInstruction : public ::testing::Test {
 protected:
-  virtual void SetUp() {
-    V = nullptr;
-  }
+  void SetUp() override { V = nullptr; }
 
   template <typename T>
   T *clone(T *V1) {
@@ -46,7 +45,7 @@ protected:
     DeleteContainerPointers(Clones);
   }
 
-  virtual void TearDown() {
+  void TearDown() override {
     eraseClones();
     DeleteContainerPointers(Orig);
     delete V;
@@ -205,16 +204,14 @@ TEST_F(CloneInstruction, CallingConvention) {
 
 class CloneFunc : public ::testing::Test {
 protected:
-  virtual void SetUp() {
+  void SetUp() override {
     SetupModule();
     CreateOldFunc();
     CreateNewFunc();
     SetupFinder();
   }
 
-  virtual void TearDown() {
-    delete Finder;
-  }
+  void TearDown() override { delete Finder; }
 
   void SetupModule() {
     M = new Module("", C);
@@ -233,7 +230,8 @@ protected:
     // Function DI
     DIFile File = DBuilder.createFile("filename.c", "/file/dir/");
     DITypeArray ParamTypes = DBuilder.getOrCreateTypeArray(None);
-    DICompositeType FuncType = DBuilder.createSubroutineType(File, ParamTypes);
+    MDSubroutineType *FuncType =
+        DBuilder.createSubroutineType(File, ParamTypes);
     DICompileUnit CU = DBuilder.createCompileUnit(dwarf::DW_LANG_C99,
         "filename.c", "/file/dir", "CloneFunc", false, "", 0);
 
@@ -258,8 +256,10 @@ protected:
     DIExpression E = DBuilder.createExpression();
     DIVariable Variable = DBuilder.createLocalVariable(
       dwarf::DW_TAG_auto_variable, Subprogram, "x", File, 5, IntType, true);
-    DBuilder.insertDeclare(Alloca, Variable, E, Store);
-    DBuilder.insertDbgValueIntrinsic(AllocaContent, 0, Variable, E, Terminator);
+    auto *DL = MDLocation::get(Subprogram->getContext(), 5, 0, Subprogram);
+    DBuilder.insertDeclare(Alloca, Variable, E, DL, Store);
+    DBuilder.insertDbgValueIntrinsic(AllocaContent, 0, Variable, E, DL,
+                                     Terminator);
     // Finalize the debug info
     DBuilder.finalize();
 
@@ -297,38 +297,41 @@ TEST_F(CloneFunc, NewFunctionCreated) {
 // Test that a new subprogram entry was added and is pointing to the new
 // function, while the original subprogram still points to the old one.
 TEST_F(CloneFunc, Subprogram) {
+  EXPECT_FALSE(verifyModule(*M));
+
   unsigned SubprogramCount = Finder->subprogram_count();
   EXPECT_EQ(2U, SubprogramCount);
 
   auto Iter = Finder->subprograms().begin();
-  DISubprogram Sub1(*Iter);
-  EXPECT_TRUE(Sub1.Verify());
+  DISubprogram Sub1 = cast<MDSubprogram>(*Iter);
   Iter++;
-  DISubprogram Sub2(*Iter);
-  EXPECT_TRUE(Sub2.Verify());
+  DISubprogram Sub2 = cast<MDSubprogram>(*Iter);
 
-  EXPECT_TRUE((Sub1.getFunction() == OldFunc && Sub2.getFunction() == NewFunc)
-           || (Sub1.getFunction() == NewFunc && Sub2.getFunction() == OldFunc));
+  EXPECT_TRUE(
+      (Sub1->getFunction() == OldFunc && Sub2->getFunction() == NewFunc) ||
+      (Sub1->getFunction() == NewFunc && Sub2->getFunction() == OldFunc));
 }
 
 // Test that the new subprogram entry was not added to the CU which doesn't
 // contain the old subprogram entry.
 TEST_F(CloneFunc, SubprogramInRightCU) {
+  EXPECT_FALSE(verifyModule(*M));
+
   EXPECT_EQ(2U, Finder->compile_unit_count());
 
   auto Iter = Finder->compile_units().begin();
-  DICompileUnit CU1(*Iter);
-  EXPECT_TRUE(CU1.Verify());
+  DICompileUnit CU1 = cast<MDCompileUnit>(*Iter);
   Iter++;
-  DICompileUnit CU2(*Iter);
-  EXPECT_TRUE(CU2.Verify());
-  EXPECT_TRUE(CU1.getSubprograms().getNumElements() == 0
-           || CU2.getSubprograms().getNumElements() == 0);
+  DICompileUnit CU2 = cast<MDCompileUnit>(*Iter);
+  EXPECT_TRUE(CU1->getSubprograms().size() == 0 ||
+              CU2->getSubprograms().size() == 0);
 }
 
 // Test that instructions in the old function still belong to it in the
 // metadata, while instruction in the new function belong to the new one.
 TEST_F(CloneFunc, InstructionOwnership) {
+  EXPECT_FALSE(verifyModule(*M));
+
   inst_iterator OldIter = inst_begin(OldFunc);
   inst_iterator OldEnd = inst_end(OldFunc);
   inst_iterator NewIter = inst_begin(NewFunc);
@@ -346,14 +349,12 @@ TEST_F(CloneFunc, InstructionOwnership) {
       // Verify that the debug location data is the same
       EXPECT_EQ(OldDL.getLine(), NewDL.getLine());
       EXPECT_EQ(OldDL.getCol(), NewDL.getCol());
-      
+
       // But that they belong to different functions
-      DISubprogram OldSubprogram(OldDL.getScope(C));
-      DISubprogram NewSubprogram(NewDL.getScope(C));
-      EXPECT_TRUE(OldSubprogram.Verify());
-      EXPECT_TRUE(NewSubprogram.Verify());
-      EXPECT_EQ(OldFunc, OldSubprogram.getFunction());
-      EXPECT_EQ(NewFunc, NewSubprogram.getFunction());
+      auto *OldSubprogram = cast<MDSubprogram>(OldDL.getScope());
+      auto *NewSubprogram = cast<MDSubprogram>(NewDL.getScope());
+      EXPECT_EQ(OldFunc, OldSubprogram->getFunction());
+      EXPECT_EQ(NewFunc, NewSubprogram->getFunction());
     }
 
     ++OldIter;
@@ -366,6 +367,8 @@ TEST_F(CloneFunc, InstructionOwnership) {
 // Test that the arguments for debug intrinsics in the new function were
 // properly cloned
 TEST_F(CloneFunc, DebugIntrinsics) {
+  EXPECT_FALSE(verifyModule(*M));
+
   inst_iterator OldIter = inst_begin(OldFunc);
   inst_iterator OldEnd = inst_end(OldFunc);
   inst_iterator NewIter = inst_begin(NewFunc);
@@ -385,21 +388,25 @@ TEST_F(CloneFunc, DebugIntrinsics) {
                          getParent()->getParent());
 
       // Old variable must belong to the old function
-      EXPECT_EQ(OldFunc, DISubprogram(DIVariable(OldIntrin->getVariable())
-                         .getContext()).getFunction());
+      EXPECT_EQ(OldFunc,
+                cast<MDSubprogram>(OldIntrin->getVariable()->getScope())
+                    ->getFunction());
       // New variable must belong to the New function
-      EXPECT_EQ(NewFunc, DISubprogram(DIVariable(NewIntrin->getVariable())
-                         .getContext()).getFunction());
+      EXPECT_EQ(NewFunc,
+                cast<MDSubprogram>(NewIntrin->getVariable()->getScope())
+                    ->getFunction());
     } else if (DbgValueInst* OldIntrin = dyn_cast<DbgValueInst>(&OldI)) {
       DbgValueInst* NewIntrin = dyn_cast<DbgValueInst>(&NewI);
       EXPECT_TRUE(NewIntrin);
 
       // Old variable must belong to the old function
-      EXPECT_EQ(OldFunc, DISubprogram(DIVariable(OldIntrin->getVariable())
-                         .getContext()).getFunction());
+      EXPECT_EQ(OldFunc,
+                cast<MDSubprogram>(OldIntrin->getVariable()->getScope())
+                    ->getFunction());
       // New variable must belong to the New function
-      EXPECT_EQ(NewFunc, DISubprogram(DIVariable(NewIntrin->getVariable())
-                         .getContext()).getFunction());
+      EXPECT_EQ(NewFunc,
+                cast<MDSubprogram>(NewIntrin->getVariable()->getScope())
+                    ->getFunction());
     }
 
     ++OldIter;
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 447b7c8..389889a 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -278,12 +278,15 @@ static void UnescapeString(std::string &Str) {
 void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+  unsigned PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
 
   O <<
   "/// printInstruction - This method is automatically generated by tablegen\n"
   "/// from the instruction set description.\n"
     "void " << Target.getName() << ClassName
-            << "::printInstruction(const MCInst *MI, raw_ostream &O) {\n";
+            << "::printInstruction(const MCInst *MI, "
+            << (PassSubtarget ? "const MCSubtargetInfo &STI, " : "")
+            << "raw_ostream &O) {\n";
 
   // Build an aggregate string, and build a table of offsets into it.
   SequenceToOffsetTable<std::string> StringTable;
@@ -787,6 +790,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   // Emit the method that prints the alias instruction.
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
   unsigned Variant = AsmWriter->getValueAsInt("Variant");
+  unsigned PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
 
   std::vector<Record*> AllInstAliases =
     Records.getAllDerivedDefinitions("InstAlias");
@@ -949,7 +953,8 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
   HeaderO << "bool " << Target.getName() << ClassName
           << "::printAliasInstr(const MCInst"
-          << " *MI, raw_ostream &OS) {\n";
+          << " *MI, " << (PassSubtarget ? "const MCSubtargetInfo &STI, " : "")
+          << "raw_ostream &OS) {\n";
 
   std::string Cases;
   raw_string_ostream CasesO(Cases);
@@ -1027,9 +1032,13 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O << "          ++I;\n";
   O << "          int OpIdx = AsmString[I++] - 1;\n";
   O << "          int PrintMethodIdx = AsmString[I++] - 1;\n";
-  O << "          printCustomAliasOperand(MI, OpIdx, PrintMethodIdx, OS);\n";
+  O << "          printCustomAliasOperand(MI, OpIdx, PrintMethodIdx, ";
+  O << (PassSubtarget ? "STI, " : "");
+  O << "OS);\n";
   O << "        } else\n";
-  O << "          printOperand(MI, unsigned(AsmString[I++]) - 1, OS);\n";
+  O << "          printOperand(MI, unsigned(AsmString[I++]) - 1, ";
+  O << (PassSubtarget ? "STI, " : "");
+  O << "OS);\n";
   O << "      } else {\n";
   O << "        OS << AsmString[I++];\n";
   O << "      }\n";
@@ -1046,7 +1055,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O << "void " << Target.getName() << ClassName << "::"
     << "printCustomAliasOperand(\n"
     << "         const MCInst *MI, unsigned OpIdx,\n"
-    << "         unsigned PrintMethodIdx, raw_ostream &OS) {\n";
+    << "         unsigned PrintMethodIdx,\n"
+    << (PassSubtarget ? "         const MCSubtargetInfo &STI,\n" : "")
+    << "         raw_ostream &OS) {\n";
   if (PrintMethods.empty())
     O << "  llvm_unreachable(\"Unknown PrintMethod kind\");\n";
   else {
@@ -1057,7 +1068,8 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
     for (unsigned i = 0; i < PrintMethods.size(); ++i) {
       O << "  case " << i << ":\n"
-        << "    " << PrintMethods[i] << "(MI, OpIdx, OS);\n"
+        << "    " << PrintMethods[i] << "(MI, OpIdx, "
+        << (PassSubtarget ? "STI, " : "") << "OS);\n"
         << "    break;\n";
     }
     O << "  }\n";
@@ -1094,7 +1106,8 @@ AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
   for (const CodeGenInstruction *I : Target.instructions())
     if (!I->AsmString.empty() && I->TheDef->getName() != "PHI")
       Instructions.push_back(
-          AsmWriterInst(*I, AsmWriter->getValueAsInt("Variant")));
+          AsmWriterInst(*I, AsmWriter->getValueAsInt("Variant"),
+                        AsmWriter->getValueAsInt("PassSubtarget")));
 
   // Get the instruction numbering.
   NumberedInstructions = &Target.getInstructionsByEnumValue();
diff --git a/utils/TableGen/AsmWriterInst.cpp b/utils/TableGen/AsmWriterInst.cpp
index 6ddc510..a66b1a0 100644
--- a/utils/TableGen/AsmWriterInst.cpp
+++ b/utils/TableGen/AsmWriterInst.cpp
@@ -39,6 +39,8 @@ std::string AsmWriterOperand::getCode() const {
   std::string Result = Str + "(MI";
   if (MIOpNo != ~0U)
     Result += ", " + utostr(MIOpNo);
+  if (PassSubtarget)
+    Result += ", STI";
   Result += ", O";
   if (!MiModifier.empty())
     Result += ", \"" + MiModifier + '"';
@@ -48,7 +50,8 @@ std::string AsmWriterOperand::getCode() const {
 /// ParseAsmString - Parse the specified Instruction's AsmString into this
 /// AsmWriterInst.
 ///
-AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
+AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant,
+                             unsigned PassSubtarget) {
   this->CGI = &CGI;
 
   // NOTE: Any extensions to this code need to be mirrored in the
@@ -163,7 +166,8 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
         Operands.push_back(AsmWriterOperand("PrintSpecial",
                                             ~0U,
                                             ~0U,
-                                            Modifier));
+                                            Modifier,
+                                            PassSubtarget));
       } else {
         // Otherwise, normal operand.
         unsigned OpNo = CGI.Operands.getOperandNamed(VarName);
@@ -171,7 +175,8 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
 
         unsigned MIOp = OpInfo.MIOperandNo;
         Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName,
-                                            OpNo, MIOp, Modifier));
+                                            OpNo, MIOp, Modifier,
+                                            PassSubtarget));
       }
       LastEmitted = VarEnd;
     }
diff --git a/utils/TableGen/AsmWriterInst.h b/utils/TableGen/AsmWriterInst.h
index 6a900b7..a597e6b 100644
--- a/utils/TableGen/AsmWriterInst.h
+++ b/utils/TableGen/AsmWriterInst.h
@@ -53,6 +53,11 @@ namespace llvm {
     /// an operand, specified with syntax like ${opname:modifier}.
     std::string MiModifier;
 
+    // PassSubtarget - Pass MCSubtargetInfo to the print method if this is
+    // equal to 1.
+    // FIXME: Remove after all ports are updated.
+    unsigned PassSubtarget;
+
     // To make VS STL happy
     AsmWriterOperand(OpType op = isLiteralTextOperand):OperandType(op) {}
 
@@ -64,9 +69,10 @@ namespace llvm {
                      unsigned _CGIOpNo,
                      unsigned _MIOpNo,
                      const std::string &Modifier,
+                     unsigned PassSubtarget,
                      OpType op = isMachineInstrOperand)
     : OperandType(op), Str(Printer), CGIOpNo(_CGIOpNo), MIOpNo(_MIOpNo),
-    MiModifier(Modifier) {}
+    MiModifier(Modifier), PassSubtarget(PassSubtarget) {}
 
     bool operator!=(const AsmWriterOperand &Other) const {
       if (OperandType != Other.OperandType || Str != Other.Str) return true;
@@ -88,7 +94,7 @@ namespace llvm {
     const CodeGenInstruction *CGI;
 
     AsmWriterInst(const CodeGenInstruction &CGI,
-                  unsigned Variant);
+                  unsigned Variant, unsigned PassSubtarget);
 
     /// MatchesAllButOneOp - If this instruction is exactly identical to the
     /// specified instruction except for one differing operand, return the
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 68c2716..4dd7681 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -711,6 +711,10 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   CopyCost = R->getValueAsInt("CopyCost");
   Allocatable = R->getValueAsBit("isAllocatable");
   AltOrderSelect = R->getValueAsString("AltOrderSelect");
+  int AllocationPriority = R->getValueAsInt("AllocationPriority");
+  if (AllocationPriority < 0 || AllocationPriority > 63)
+    PrintFatalError(R->getLoc(), "AllocationPriority out of range [0,63]");
+  this->AllocationPriority = AllocationPriority;
 }
 
 // Create an inferred register class that was missing from the .td files.
@@ -726,7 +730,8 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
     SpillSize(Props.SpillSize),
     SpillAlignment(Props.SpillAlignment),
     CopyCost(0),
-    Allocatable(true) {
+    Allocatable(true),
+    AllocationPriority(0) {
   for (const auto R : Members)
     TopoSigs.set(R->getTopoSig());
 }
@@ -746,6 +751,7 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) {
   CopyCost = Super.CopyCost;
   Allocatable = Super.Allocatable;
   AltOrderSelect = Super.AltOrderSelect;
+  AllocationPriority = Super.AllocationPriority;
 
   // Copy all allocation orders, filter out foreign registers from the larger
   // super-class.
@@ -1774,7 +1780,7 @@ void CodeGenRegBank::computeRegUnitLaneMasks() {
       const CodeGenRegister *SubRegister = S->second;
       unsigned LaneMask = SubRegIndex->LaneMask;
       // Distribute LaneMask to Register Units touched.
-      for (const auto &SUI : SubRegister->getRegUnits()) {
+      for (unsigned SUI : SubRegister->getRegUnits()) {
         bool Found = false;
         unsigned u = 0;
         for (unsigned RU : RegUnits) {
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 00e2957..dc44143 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -306,6 +306,7 @@ namespace llvm {
     int CopyCost;
     bool Allocatable;
     std::string AltOrderSelect;
+    uint8_t AllocationPriority;
     /// Contains the combination of the lane masks of all subregisters.
     unsigned LaneMask;
     /// True if there are at least 2 subregisters which do not interfere.
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 9df3b41..a8a6ba5 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -194,7 +194,7 @@ public:
   ScopeMatcher(ArrayRef<Matcher *> children)
     : Matcher(Scope), Children(children.begin(), children.end()) {
   }
-  virtual ~ScopeMatcher();
+  ~ScopeMatcher() override;
 
   unsigned getNumChildren() const { return Children.size(); }
 
@@ -507,7 +507,7 @@ class SwitchOpcodeMatcher : public Matcher {
 public:
   SwitchOpcodeMatcher(ArrayRef<std::pair<const SDNodeInfo*, Matcher*> > cases)
     : Matcher(SwitchOpcode), Cases(cases.begin(), cases.end()) {}
-  virtual ~SwitchOpcodeMatcher();
+  ~SwitchOpcodeMatcher() override;
 
   static inline bool classof(const Matcher *N) {
     return N->getKind() == SwitchOpcode;
@@ -561,7 +561,7 @@ class SwitchTypeMatcher : public Matcher {
 public:
   SwitchTypeMatcher(ArrayRef<std::pair<MVT::SimpleValueType, Matcher*> > cases)
   : Matcher(SwitchType), Cases(cases.begin(), cases.end()) {}
-  virtual ~SwitchTypeMatcher();
+  ~SwitchTypeMatcher() override;
 
   static inline bool classof(const Matcher *N) {
     return N->getKind() == SwitchType;
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index c69b89b..a4a46b3 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -1112,6 +1112,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
                                        unsigned Opc) const {
   ListInit *Predicates =
     AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+  bool IsFirstEmission = true;
   for (unsigned i = 0; i < Predicates->getSize(); ++i) {
     Record *Pred = Predicates->getElementAsRecord(i);
     if (!Pred->getValue("AssemblerMatcherPredicate"))
@@ -1122,7 +1123,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
     if (!P.length())
       continue;
 
-    if (i != 0)
+    if (!IsFirstEmission)
       o << " && ";
 
     StringRef SR(P);
@@ -1133,6 +1134,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
       pairs = pairs.second.split(',');
     }
     emitSinglePredicateMatch(o, pairs.first, Emitter->PredicateNamespace);
+    IsFirstEmission = false;
   }
   return Predicates->getSize() > 0;
 }
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 17bee6e..a8423a9 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -233,7 +233,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
   OS << "// Get the name of this register unit pressure set.\n"
      << "const char *" << ClassName << "::\n"
      << "getRegPressureSetName(unsigned Idx) const {\n"
-     << "  static const char *PressureNameTable[] = {\n";
+     << "  static const char *const PressureNameTable[] = {\n";
   unsigned MaxRegUnitWeight = 0;
   for (unsigned i = 0; i < NumSets; ++i ) {
     const RegUnitSet &RegUnits = RegBank.getRegSetAt(i);
@@ -752,7 +752,7 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
     Idx += Sequence.size() + 1;
   }
   OS << "  };\n"
-        "  static const MaskRolOp *CompositeSequences[] = {\n";
+        "  static const MaskRolOp *const CompositeSequences[] = {\n";
   for (size_t i = 0, e = SubRegIndices.size(); i != e; ++i) {
     OS << "    ";
     unsigned Idx = SubReg2SequenceIndexMap[i];
@@ -1287,6 +1287,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
          << "SubClassMask,\n    SuperRegIdxSeqs + "
          << SuperRegIdxSeqs.get(SuperRegIdxLists[RC.EnumValue]) << ",\n    "
          << format("0x%08x,\n    ", RC.LaneMask)
+         << (unsigned)RC.AllocationPriority << ",\n    "
          << (RC.HasDisjunctSubRegs?"true":"false")
          << ", /* HasDisjunctSubRegs */\n    ";
       if (RC.getSuperClasses().empty())
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index 5895277..e5889e9 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_UTILS_TABLEGEN_X86DISASSEMBLERSHARED_H
 #define LLVM_UTILS_TABLEGEN_X86DISASSEMBLERSHARED_H
 
-#include <string.h>
+#include <cstring>
 #include <string>
 
 #include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h"
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index e7e292d..0f3ea9a 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -597,7 +597,8 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
   o << "static const struct OperandSpecifier x86OperandSets[]["
     << X86_MAX_OPERANDS << "] = {\n";
 
-  typedef std::vector<std::pair<const char *, const char *> > OperandListTy;
+  typedef SmallVector<std::pair<OperandEncoding, OperandType>,
+                      X86_MAX_OPERANDS> OperandListTy;
   std::map<OperandListTy, unsigned> OperandSets;
 
   unsigned OperandSetNum = 0;
@@ -606,12 +607,10 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
 
     for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS;
          ++OperandIndex) {
-      const char *Encoding =
-        stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[Index]
-                                 .operands[OperandIndex].encoding);
-      const char *Type =
-        stringForOperandType((OperandType)InstructionSpecifiers[Index]
-                             .operands[OperandIndex].type);
+      OperandEncoding Encoding = (OperandEncoding)InstructionSpecifiers[Index]
+                                 .operands[OperandIndex].encoding;
+      OperandType Type = (OperandType)InstructionSpecifiers[Index]
+                         .operands[OperandIndex].type;
       OperandList.push_back(std::make_pair(Encoding, Type));
     }
     unsigned &N = OperandSets[OperandList];
@@ -621,8 +620,9 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
 
     o << "  { /* " << (OperandSetNum - 1) << " */\n";
     for (unsigned i = 0, e = OperandList.size(); i != e; ++i) {
-      o << "    { " << OperandList[i].first << ", "
-        << OperandList[i].second << " },\n";
+      const char *Encoding = stringForOperandEncoding(OperandList[i].first);
+      const char *Type     = stringForOperandType(OperandList[i].second);
+      o << "    { " << Encoding << ", " << Type << " },\n";
     }
     o << "  },\n";
   }
@@ -634,32 +634,24 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
   i++;
 
   for (unsigned index = 0; index < NumInstructions; ++index) {
-    o.indent(i * 2) << "{ /* " << index << " */" << "\n";
+    o.indent(i * 2) << "{ /* " << index << " */\n";
     i++;
 
     OperandListTy OperandList;
     for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS;
          ++OperandIndex) {
-      const char *Encoding =
-        stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index]
-                                 .operands[OperandIndex].encoding);
-      const char *Type =
-        stringForOperandType((OperandType)InstructionSpecifiers[index]
-                             .operands[OperandIndex].type);
+      OperandEncoding Encoding = (OperandEncoding)InstructionSpecifiers[index]
+                                 .operands[OperandIndex].encoding;
+      OperandType Type = (OperandType)InstructionSpecifiers[index]
+                         .operands[OperandIndex].type;
       OperandList.push_back(std::make_pair(Encoding, Type));
     }
     o.indent(i * 2) << (OperandSets[OperandList] - 1) << ",\n";
 
-    o.indent(i * 2) << "/* " << InstructionSpecifiers[index].name << " */";
-    o << "\n";
+    o.indent(i * 2) << "/* " << InstructionSpecifiers[index].name << " */\n";
 
     i--;
-    o.indent(i * 2) << "}";
-
-    if (index + 1 < NumInstructions)
-      o << ",";
-
-    o << "\n";
+    o.indent(i * 2) << "},\n";
   }
 
   i--;
diff --git a/utils/create_ladder_graph.py b/utils/create_ladder_graph.py
new file mode 100644
index 0000000..d29e3ad
--- /dev/null
+++ b/utils/create_ladder_graph.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+"""A ladder graph creation program.
+
+This is a python program that creates c source code that will generate
+CFGs that are ladder graphs.  Ladder graphs are generally the worst case
+for a lot of dominance related algorithms (Dominance frontiers, etc),
+and often generate N^2 or worse behavior.
+
+One good use of this program is to test whether your linear time algorithm is
+really behaving linearly.
+"""
+
+import argparse
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('rungs', type=int,
+                      help="Number of ladder rungs. Must be a multiple of 2")
+  args = parser.parse_args()
+  if (args.rungs % 2) != 0:
+    print "Rungs must be a multiple of 2"
+    return
+  print "int ladder(int *foo, int *bar, int x) {"
+  rung1 = xrange(0, args.rungs, 2)
+  rung2 = xrange(1, args.rungs, 2)
+  for i in rung1:
+    print "rung1%d:" % i
+    print "*foo = x++;"
+    if i != rung1[-1]:
+      print "if (*bar) goto rung1%d;" % (i+2)
+      print "else goto rung2%d;" % (i+1)
+    else:
+      print "goto rung2%d;" % (i+1)
+  for i in rung2:
+    print "rung2%d:" % i
+    print "*foo = x++;"
+    if i != rung2[-1]:
+      print "goto rung2%d;" % (i+2)
+    else:
+      print "return *foo;"
+  print "}"
+
+if __name__ == '__main__':
+  main()
diff --git a/utils/lit/lit/formats/googletest.py b/utils/lit/lit/formats/googletest.py
index 59ac3c5..3ce5791 100644
--- a/utils/lit/lit/formats/googletest.py
+++ b/utils/lit/lit/formats/googletest.py
@@ -53,6 +53,11 @@ class GoogleTest(TestFormat):
             ln = ln[index*2:]
             if ln.endswith('.'):
                 nested_tests.append(ln)
+            elif any([name.startswith('DISABLED_')
+                      for name in nested_tests + [ln]]):
+                # Gtest will internally skip these tests. No need to launch a
+                # child process for it.
+                continue
             else:
                 yield ''.join(nested_tests) + ln
 
@@ -95,7 +100,7 @@ class GoogleTest(TestFormat):
             # Handle GTest parametrized and typed tests, whose name includes
             # some '/'s.
             testPath, namePrefix = os.path.split(testPath)
-            testName = os.path.join(namePrefix, testName)
+            testName = namePrefix + '/' + testName
 
         cmd = [testPath, '--gtest_filter=' + testName]
         if litConfig.useValgrind:
@@ -107,7 +112,14 @@ class GoogleTest(TestFormat):
         out, err, exitCode = lit.util.executeCommand(
             cmd, env=test.config.environment)
 
-        if not exitCode:
-            return lit.Test.PASS,''
+        if exitCode:
+            return lit.Test.FAIL, out + err
+
+        passing_test_line = '[  PASSED  ] 1 test.'
+        if passing_test_line not in out:
+            msg = ('Unable to find %r in gtest output:\n\n%s%s' %
+                   (passing_test_line, out, err))
+            return lit.Test.UNRESOLVED, msg
+
+        return lit.Test.PASS,''
 
-        return lit.Test.FAIL, out + err
diff --git a/utils/unittest/UnitTestMain/TestMain.cpp b/utils/unittest/UnitTestMain/TestMain.cpp
index 5387512..f5b09a5 100644
--- a/utils/unittest/UnitTestMain/TestMain.cpp
+++ b/utils/unittest/UnitTestMain/TestMain.cpp
@@ -23,7 +23,7 @@
 const char *TestMainArgv0;
 
 int main(int argc, char **argv) {
-  llvm::sys::PrintStackTraceOnErrorSignal();
+  llvm::sys::PrintStackTraceOnErrorSignal(true /* Disable crash reporting */);
   testing::InitGoogleTest(&argc, argv);
   llvm::cl::ParseCommandLineOptions(argc, argv);
 
diff --git a/utils/unittest/googletest/include/gtest/gtest-spi.h b/utils/unittest/googletest/include/gtest/gtest-spi.h
index b226e55..736f692 100644
--- a/utils/unittest/googletest/include/gtest/gtest-spi.h
+++ b/utils/unittest/googletest/include/gtest/gtest-spi.h
@@ -68,14 +68,15 @@ class GTEST_API_ ScopedFakeTestPartResultReporter
                                    TestPartResultArray* result);
 
   // The d'tor restores the previous test part result reporter.
-  virtual ~ScopedFakeTestPartResultReporter();
+  ~ScopedFakeTestPartResultReporter() override;
 
   // Appends the TestPartResult object to the TestPartResultArray
   // received in the constructor.
   //
   // This method is from the TestPartResultReporterInterface
   // interface.
-  virtual void ReportTestPartResult(const TestPartResult& result);
+  void ReportTestPartResult(const TestPartResult &result) override;
+
  private:
   void Init();
 
diff --git a/utils/unittest/googletest/include/gtest/gtest-test-part.h b/utils/unittest/googletest/include/gtest/gtest-test-part.h
index 98e8b84..d2410c0 100644
--- a/utils/unittest/googletest/include/gtest/gtest-test-part.h
+++ b/utils/unittest/googletest/include/gtest/gtest-test-part.h
@@ -159,8 +159,8 @@ class GTEST_API_ HasNewFatalFailureHelper
     : public TestPartResultReporterInterface {
  public:
   HasNewFatalFailureHelper();
-  virtual ~HasNewFatalFailureHelper();
-  virtual void ReportTestPartResult(const TestPartResult& result);
+  ~HasNewFatalFailureHelper() override;
+  void ReportTestPartResult(const TestPartResult &result) override;
   bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
  private:
   bool has_new_fatal_failure_;
diff --git a/utils/unittest/googletest/include/gtest/gtest.h b/utils/unittest/googletest/include/gtest/gtest.h
index 257cee6..92ca5cc 100644
--- a/utils/unittest/googletest/include/gtest/gtest.h
+++ b/utils/unittest/googletest/include/gtest/gtest.h
@@ -982,21 +982,22 @@ class TestEventListener {
 class EmptyTestEventListener : public TestEventListener {
   virtual void anchor();
  public:
-  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
-                                    int /*iteration*/) {}
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
-  virtual void OnTestStart(const TestInfo& /*test_info*/) {}
-  virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
-  virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
-  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
-                                  int /*iteration*/) {}
-  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+   void OnTestProgramStart(const UnitTest & /*unit_test*/) override {}
+   void OnTestIterationStart(const UnitTest & /*unit_test*/,
+                             int /*iteration*/) override {}
+   void OnEnvironmentsSetUpStart(const UnitTest & /*unit_test*/) override {}
+   void OnEnvironmentsSetUpEnd(const UnitTest & /*unit_test*/) override {}
+   void OnTestCaseStart(const TestCase & /*test_case*/) override {}
+   void OnTestStart(const TestInfo & /*test_info*/) override {}
+   void OnTestPartResult(const TestPartResult & /*test_part_result*/) override {
+   }
+   void OnTestEnd(const TestInfo & /*test_info*/) override {}
+   void OnTestCaseEnd(const TestCase & /*test_case*/) override {}
+   void OnEnvironmentsTearDownStart(const UnitTest & /*unit_test*/) override {}
+   void OnEnvironmentsTearDownEnd(const UnitTest & /*unit_test*/) override {}
+   void OnTestIterationEnd(const UnitTest & /*unit_test*/,
+                           int /*iteration*/) override {}
+   void OnTestProgramEnd(const UnitTest & /*unit_test*/) override {}
 };
 
 // TestEventListeners lets users add listeners to track events in Google Test.
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h b/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
index 8d53c45..04c676c 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
@@ -147,8 +147,8 @@ class DeathTestFactory {
 // A concrete DeathTestFactory implementation for normal use.
 class DefaultDeathTestFactory : public DeathTestFactory {
  public:
-  virtual bool Create(const char* statement, const RE* regex,
-                      const char* file, int line, DeathTest** test);
+   bool Create(const char *statement, const RE *regex, const char *file,
+               int line, DeathTest **test) override;
 };
 
 // Returns true if exit_status describes a process that was terminated
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
index 63f72ac..3c7eee8 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
@@ -555,7 +555,7 @@ class TestFactoryBase {
 template <class TestClass>
 class TestFactoryImpl : public TestFactoryBase {
  public:
-  virtual Test* CreateTest() { return new TestClass; }
+   Test *CreateTest() override { return new TestClass; }
 };
 
 #if GTEST_OS_WINDOWS
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h b/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h
index 3bb2ffb..dea4d5c 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h
@@ -270,12 +270,12 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
   template <typename ForwardIterator>
   ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
       : container_(begin, end) {}
-  virtual ~ValuesInIteratorRangeGenerator() {}
+  ~ValuesInIteratorRangeGenerator() override {}
 
-  virtual ParamIteratorInterface<T>* Begin() const {
+  ParamIteratorInterface<T> *Begin() const override {
     return new Iterator(this, container_.begin());
   }
-  virtual ParamIteratorInterface<T>* End() const {
+  ParamIteratorInterface<T> *End() const override {
     return new Iterator(this, container_.end());
   }
 
@@ -287,16 +287,16 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
     Iterator(const ParamGeneratorInterface<T>* base,
              typename ContainerType::const_iterator iterator)
         : base_(base), iterator_(iterator) {}
-    virtual ~Iterator() {}
+    ~Iterator() override {}
 
-    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+    const ParamGeneratorInterface<T> *BaseGenerator() const override {
       return base_;
     }
-    virtual void Advance() {
+    void Advance() override {
       ++iterator_;
       value_.reset();
     }
-    virtual ParamIteratorInterface<T>* Clone() const {
+    ParamIteratorInterface<T> *Clone() const override {
       return new Iterator(*this);
     }
     // We need to use cached value referenced by iterator_ because *iterator_
@@ -306,12 +306,12 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
     // can advance iterator_ beyond the end of the range, and we cannot
     // detect that fact. The client code, on the other hand, is
     // responsible for not calling Current() on an out-of-range iterator.
-    virtual const T* Current() const {
+    const T *Current() const override {
       if (value_.get() == NULL)
         value_.reset(new T(*iterator_));
       return value_.get();
     }
-    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+    bool Equals(const ParamIteratorInterface<T> &other) const override {
       // Having the same base generator guarantees that the other
       // iterator is of the same type and we can downcast.
       GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
@@ -355,7 +355,7 @@ class ParameterizedTestFactory : public TestFactoryBase {
   typedef typename TestClass::ParamType ParamType;
   explicit ParameterizedTestFactory(ParamType parameter) :
       parameter_(parameter) {}
-  virtual Test* CreateTest() {
+  Test *CreateTest() override {
     TestClass::SetParam(&parameter_);
     return new TestClass();
   }
@@ -394,7 +394,7 @@ class TestMetaFactory
 
   TestMetaFactory() {}
 
-  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
+  TestFactoryBase *CreateTestFactory(ParamType parameter) override {
     return new ParameterizedTestFactory<TestCase>(parameter);
   }
 
@@ -454,9 +454,9 @@ class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
       : test_case_name_(name) {}
 
   // Test case base name for display purposes.
-  virtual const string& GetTestCaseName() const { return test_case_name_; }
+  const string &GetTestCaseName() const override { return test_case_name_; }
   // Test case id to verify identity.
-  virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
+  TypeId GetTestCaseTypeId() const override { return GetTypeId<TestCase>(); }
   // TEST_P macro uses AddTestPattern() to record information
   // about a single test in a LocalTestInfo structure.
   // test_case_name is the base name of the test case (without invocation
@@ -484,7 +484,7 @@ class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
   // This method should not be called more then once on any single
   // instance of a ParameterizedTestCaseInfoBase derived class.
   // UnitTest has a guard to prevent from calling this method more then once.
-  virtual void RegisterTests() {
+  void RegisterTests() override {
     for (typename TestInfoContainer::iterator test_it = tests_.begin();
          test_it != tests_.end(); ++test_it) {
       linked_ptr<TestInfo> test_info = *test_it;
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-port.h b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
index 9ddcea1..6b942e9 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-port.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
@@ -1165,7 +1165,7 @@ class ThreadWithParam : public ThreadWithParamBase {
     GTEST_CHECK_POSIX_SUCCESS_(
         pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
   }
-  ~ThreadWithParam() { Join(); }
+  ~ThreadWithParam() override { Join(); }
 
   void Join() {
     if (!finished_) {
@@ -1174,7 +1174,7 @@ class ThreadWithParam : public ThreadWithParamBase {
     }
   }
 
-  virtual void Run() {
+  void Run() override {
     if (thread_can_start_ != NULL)
       thread_can_start_->WaitForNotification();
     func_(param_);
diff --git a/utils/unittest/googletest/src/gtest-death-test.cc b/utils/unittest/googletest/src/gtest-death-test.cc
index 314dba2..47c1a15 100644
--- a/utils/unittest/googletest/src/gtest-death-test.cc
+++ b/utils/unittest/googletest/src/gtest-death-test.cc
@@ -334,10 +334,10 @@ class DeathTestImpl : public DeathTest {
         write_fd_(-1) {}
 
   // read_fd_ is expected to be closed and cleared by a derived class.
-  ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
+  ~DeathTestImpl() override { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
 
-  void Abort(AbortReason reason);
-  virtual bool Passed(bool status_ok);
+  void Abort(AbortReason reason) override;
+  bool Passed(bool status_ok) override;
 
   const char* statement() const { return statement_; }
   const RE* regex() const { return regex_; }
@@ -744,7 +744,7 @@ class ForkingDeathTest : public DeathTestImpl {
   ForkingDeathTest(const char* statement, const RE* regex);
 
   // All of these virtual functions are inherited from DeathTest.
-  virtual int Wait();
+  int Wait() override;
 
  protected:
   void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }
@@ -780,7 +780,7 @@ class NoExecDeathTest : public ForkingDeathTest {
  public:
   NoExecDeathTest(const char* a_statement, const RE* a_regex) :
       ForkingDeathTest(a_statement, a_regex) { }
-  virtual TestRole AssumeRole();
+  TestRole AssumeRole() override;
 };
 
 // The AssumeRole process for a fork-and-run death test.  It implements a
@@ -835,7 +835,8 @@ class ExecDeathTest : public ForkingDeathTest {
   ExecDeathTest(const char* a_statement, const RE* a_regex,
                 const char* file, int line) :
       ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
-  virtual TestRole AssumeRole();
+  TestRole AssumeRole() override;
+
  private:
   // The name of the file in which the death test is located.
   const char* const file_;
diff --git a/utils/unittest/googletest/src/gtest-internal-inl.h b/utils/unittest/googletest/src/gtest-internal-inl.h
index 1bae630..35e865f 100644
--- a/utils/unittest/googletest/src/gtest-internal-inl.h
+++ b/utils/unittest/googletest/src/gtest-internal-inl.h
@@ -431,8 +431,8 @@ class OsStackTraceGetterInterface {
 class OsStackTraceGetter : public OsStackTraceGetterInterface {
  public:
   OsStackTraceGetter() : caller_frame_(NULL) {}
-  virtual String CurrentStackTrace(int max_depth, int skip_count);
-  virtual void UponLeavingGTest();
+  String CurrentStackTrace(int max_depth, int skip_count) override;
+  void UponLeavingGTest() override;
 
   // This string is inserted in place of stack frames that are part of
   // Google Test's implementation.
@@ -465,7 +465,7 @@ class DefaultGlobalTestPartResultReporter
   explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
   // Implements the TestPartResultReporterInterface. Reports the test part
   // result in the current test.
-  virtual void ReportTestPartResult(const TestPartResult& result);
+  void ReportTestPartResult(const TestPartResult &result) override;
 
  private:
   UnitTestImpl* const unit_test_;
@@ -481,7 +481,7 @@ class DefaultPerThreadTestPartResultReporter
   explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
   // Implements the TestPartResultReporterInterface. The implementation just
   // delegates to the current global test part result reporter of *unit_test_.
-  virtual void ReportTestPartResult(const TestPartResult& result);
+  void ReportTestPartResult(const TestPartResult &result) override;
 
  private:
   UnitTestImpl* const unit_test_;
diff --git a/utils/unittest/googletest/src/gtest.cc b/utils/unittest/googletest/src/gtest.cc
index bf850c6..5780764 100644
--- a/utils/unittest/googletest/src/gtest.cc
+++ b/utils/unittest/googletest/src/gtest.cc
@@ -2663,19 +2663,19 @@ class PrettyUnitTestResultPrinter : public TestEventListener {
   }
 
   // The following methods override what's in the TestEventListener class.
-  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestCaseStart(const TestCase& test_case);
-  virtual void OnTestStart(const TestInfo& test_info);
-  virtual void OnTestPartResult(const TestPartResult& result);
-  virtual void OnTestEnd(const TestInfo& test_info);
-  virtual void OnTestCaseEnd(const TestCase& test_case);
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
-  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
-  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+  void OnTestProgramStart(const UnitTest & /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest &unit_test, int iteration) override;
+  void OnEnvironmentsSetUpStart(const UnitTest &unit_test) override;
+  void OnEnvironmentsSetUpEnd(const UnitTest & /*unit_test*/) override {}
+  void OnTestCaseStart(const TestCase &test_case) override;
+  void OnTestStart(const TestInfo &test_info) override;
+  void OnTestPartResult(const TestPartResult &result) override;
+  void OnTestEnd(const TestInfo &test_info) override;
+  void OnTestCaseEnd(const TestCase &test_case) override;
+  void OnEnvironmentsTearDownStart(const UnitTest &unit_test) override;
+  void OnEnvironmentsTearDownEnd(const UnitTest & /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest &unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest & /*unit_test*/) override {}
 
  private:
   static void PrintFailedTests(const UnitTest& unit_test);
@@ -2869,7 +2869,7 @@ void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
 class TestEventRepeater : public TestEventListener {
  public:
   TestEventRepeater() : forwarding_enabled_(true) {}
-  virtual ~TestEventRepeater();
+  ~TestEventRepeater() override;
   void Append(TestEventListener *listener);
   TestEventListener* Release(TestEventListener* listener);
 
@@ -2878,19 +2878,19 @@ class TestEventRepeater : public TestEventListener {
   bool forwarding_enabled() const { return forwarding_enabled_; }
   void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
 
-  virtual void OnTestProgramStart(const UnitTest& unit_test);
-  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
-  virtual void OnTestCaseStart(const TestCase& test_case);
-  virtual void OnTestStart(const TestInfo& test_info);
-  virtual void OnTestPartResult(const TestPartResult& result);
-  virtual void OnTestEnd(const TestInfo& test_info);
-  virtual void OnTestCaseEnd(const TestCase& test_case);
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
-  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
-  virtual void OnTestProgramEnd(const UnitTest& unit_test);
+  void OnTestProgramStart(const UnitTest &unit_test) override;
+  void OnTestIterationStart(const UnitTest &unit_test, int iteration) override;
+  void OnEnvironmentsSetUpStart(const UnitTest &unit_test) override;
+  void OnEnvironmentsSetUpEnd(const UnitTest &unit_test) override;
+  void OnTestCaseStart(const TestCase &test_case) override;
+  void OnTestStart(const TestInfo &test_info) override;
+  void OnTestPartResult(const TestPartResult &result) override;
+  void OnTestEnd(const TestInfo &test_info) override;
+  void OnTestCaseEnd(const TestCase &test_case) override;
+  void OnEnvironmentsTearDownStart(const UnitTest &unit_test) override;
+  void OnEnvironmentsTearDownEnd(const UnitTest &unit_test) override;
+  void OnTestIterationEnd(const UnitTest &unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest &unit_test) override;
 
  private:
   // Controls whether events will be forwarded to listeners_. Set to false
@@ -2983,7 +2983,7 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener {
  public:
   explicit XmlUnitTestResultPrinter(const char* output_file);
 
-  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+  void OnTestIterationEnd(const UnitTest &unit_test, int iteration) override;
 
  private:
   // Is c a whitespace character that is normalized to a space character
@@ -3310,16 +3310,16 @@ class StreamingListener : public EmptyTestEventListener {
     Send("gtest_streaming_protocol_version=1.0\n");
   }
 
-  virtual ~StreamingListener() {
+  ~StreamingListener() override {
     if (sockfd_ != -1)
       CloseConnection();
   }
 
-  void OnTestProgramStart(const UnitTest& /* unit_test */) {
+  void OnTestProgramStart(const UnitTest & /* unit_test */) override {
     Send("event=TestProgramStart\n");
   }
 
-  void OnTestProgramEnd(const UnitTest& unit_test) {
+  void OnTestProgramEnd(const UnitTest &unit_test) override {
     // Note that Google Test current only report elapsed time for each
     // test iteration, not for the entire test program.
     Send(String::Format("event=TestProgramEnd&passed=%d\n",
@@ -3329,39 +3329,41 @@ class StreamingListener : public EmptyTestEventListener {
     CloseConnection();
   }
 
-  void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
+  void OnTestIterationStart(const UnitTest & /* unit_test */,
+                            int iteration) override {
     Send(String::Format("event=TestIterationStart&iteration=%d\n",
                         iteration));
   }
 
-  void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
+  void OnTestIterationEnd(const UnitTest &unit_test,
+                          int /* iteration */) override {
     Send(String::Format("event=TestIterationEnd&passed=%d&elapsed_time=%sms\n",
                         unit_test.Passed(),
                         StreamableToString(unit_test.elapsed_time()).c_str()));
   }
 
-  void OnTestCaseStart(const TestCase& test_case) {
+  void OnTestCaseStart(const TestCase &test_case) override {
     Send(String::Format("event=TestCaseStart&name=%s\n", test_case.name()));
   }
 
-  void OnTestCaseEnd(const TestCase& test_case) {
+  void OnTestCaseEnd(const TestCase &test_case) override {
     Send(String::Format("event=TestCaseEnd&passed=%d&elapsed_time=%sms\n",
                         test_case.Passed(),
                         StreamableToString(test_case.elapsed_time()).c_str()));
   }
 
-  void OnTestStart(const TestInfo& test_info) {
+  void OnTestStart(const TestInfo &test_info) override {
     Send(String::Format("event=TestStart&name=%s\n", test_info.name()));
   }
 
-  void OnTestEnd(const TestInfo& test_info) {
+  void OnTestEnd(const TestInfo &test_info) override {
     Send(String::Format(
         "event=TestEnd&passed=%d&elapsed_time=%sms\n",
         (test_info.result())->Passed(),
         StreamableToString((test_info.result())->elapsed_time()).c_str()));
   }
 
-  void OnTestPartResult(const TestPartResult& test_part_result) {
+  void OnTestPartResult(const TestPartResult &test_part_result) override {
     const char* file_name = test_part_result.file_name();
     if (file_name == NULL)
       file_name = "";
diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp
index 8bd1ea1..872f586 100644
--- a/utils/yaml-bench/YAMLBench.cpp
+++ b/utils/yaml-bench/YAMLBench.cpp
@@ -117,7 +117,7 @@ static void dumpNode( yaml::Node *n
     outs() << indent(Indent) << "}";
   } else if (yaml::AliasNode *an = dyn_cast<yaml::AliasNode>(n)){
     outs() << "*" << an->getName();
-  } else if (dyn_cast<yaml::NullNode>(n)) {
+  } else if (isa<yaml::NullNode>(n)) {
     outs() << prettyTag(n) << " null";
   }
 }
author	Pirama Arumuga Nainar <pirama@google.com>	2015-05-06 11:46:36 -0700
committer	Pirama Arumuga Nainar <pirama@google.com>	2015-05-18 10:52:30 -0700
commit	2c3e0051c31c3f5b2328b447eadf1cf9c4427442 (patch)
tree	c0104029af14e9f47c2ef58ca60e6137691f3c9b
parent	e1bc145815f4334641be19f1c45ecf85d25b6e5a (diff)
download	external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.zip external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.gz external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.bz2